aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
committerStephen Hines <srhines@google.com>2014-02-11 20:01:10 -0800
commitce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Target
parentc27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parentdbb832b83351cec97b025b61c26536ef50c3181c (diff)
downloadexternal_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2
Merge remote-tracking branch 'upstream/release_34' into merge-20140211
Conflicts: lib/Linker/LinkModules.cpp lib/Support/Unix/Signals.inc Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td7
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp130
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td9
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp1027
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp1183
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h86
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td451
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp92
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td50
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td7329
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp2
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td164
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp26
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h19
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp353
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp762
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp54
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp5
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp56
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h5
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h59
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp7
-rw-r--r--lib/Target/ARM/ARM.td81
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp425
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp299
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h20
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp44
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h12
-rw-r--r--lib/Target/ARM/ARMBuildAttrs.h71
-rw-r--r--lib/Target/ARM/ARMCallingConv.td22
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp50
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h33
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp75
-rw-r--r--lib/Target/ARM/ARMFPUName.def32
-rw-r--r--lib/Target/ARM/ARMFPUName.h26
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp104
-rw-r--r--lib/Target/ARM/ARMFeatures.h93
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp150
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp387
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1026
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td76
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp17
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td603
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td227
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td57
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td430
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td165
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp82
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h61
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td3
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td132
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td13
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp68
-rw-r--r--lib/Target/ARM/ARMSubtarget.h59
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp94
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp516
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp89
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp167
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp36
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h18
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp540
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp64
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp129
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp30
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp63
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp17
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp25
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp37
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h7
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h1
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td1
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h6
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp16
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h5
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp3
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h4
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp1
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp128
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h1
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp2
-rw-r--r--lib/Target/Mangler.cpp113
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp1314
-rw-r--r--lib/Target/Mips/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp347
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp20
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp93
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h43
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp140
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp67
-rw-r--r--lib/Target/Mips/MSA.txt78
-rw-r--r--lib/Target/Mips/MicroMipsInstrFormats.td196
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td182
-rw-r--r--lib/Target/Mips/Mips.h1
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp64
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp9
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp61
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp62
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td148
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td213
-rw-r--r--lib/Target/Mips/MipsAnalyzeImmediate.h10
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp118
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h24
-rw-r--r--lib/Target/Mips/MipsCallingConv.td20
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp37
-rw-r--r--lib/Target/Mips/MipsCondMov.td81
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp1469
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td468
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp29
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp78
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h38
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp562
-rw-r--r--lib/Target/Mips/MipsISelLowering.h151
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td326
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td66
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp11
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td661
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp9
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp5
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h4
-rw-r--r--lib/Target/Mips/MipsMSAInstrFormats.td406
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td3694
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp66
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h103
-rw-r--r--lib/Target/Mips/MipsOptimizeMathLibCalls.cpp175
-rw-r--r--lib/Target/Mips/MipsOs16.cpp45
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp57
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h7
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td219
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp93
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp429
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h44
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp2269
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h48
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp266
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h14
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp78
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp24
-rw-r--r--lib/Target/Mips/MipsSubtarget.h19
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h44
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp10
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h3
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp215
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h16
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp17
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp101
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h1
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h1
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td14
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h4
-rw-r--r--lib/Target/NVPTX/NVPTXSplitBBatBar.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp28
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h24
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp44
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp22
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp44
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp9
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp49
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp389
-rw-r--r--lib/Target/PowerPC/PPC.td51
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp58
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp14
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td31
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp1946
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp540
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp184
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td41
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td30
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td33
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td117
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp10
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td8
-rw-r--r--lib/Target/PowerPC/PPCScheduleA2.td841
-rw-r--r--lib/Target/PowerPC/PPCScheduleE500mc.td2
-rw-r--r--lib/Target/PowerPC/PPCScheduleE5500.td1
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp54
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h15
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h23
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--lib/Target/R600/AMDGPU.h3
-rw-r--r--lib/Target/R600/AMDGPU.td12
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp67
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h12
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td35
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp536
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp360
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h37
-rw-r--r--lib/Target/R600/AMDGPUIndirectAddressing.cpp345
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp117
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h45
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td14
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td192
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp43
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.cpp3
-rw-r--r--lib/Target/R600/AMDGPUMachineFunction.h5
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp32
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h8
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp15
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h11
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp43
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp112
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td10
-rw-r--r--lib/Target/R600/CMakeLists.txt3
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp82
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp6
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp21
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h1
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp1
-rw-r--r--lib/Target/R600/Processors.td7
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp204
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp13
-rw-r--r--lib/Target/R600/R600Defines.h4
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp80
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp17
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp609
-rw-r--r--lib/Target/R600/R600ISelLowering.h3
-rw-r--r--lib/Target/R600/R600InstrFormats.td6
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp199
-rw-r--r--lib/Target/R600/R600InstrInfo.h47
-rw-r--r--lib/Target/R600/R600Instructions.td342
-rw-r--r--lib/Target/R600/R600Intrinsics.td7
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp69
-rw-r--r--lib/Target/R600/R600MachineScheduler.h7
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp3
-rw-r--r--lib/Target/R600/R600Packetizer.cpp52
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp44
-rw-r--r--lib/Target/R600/R600RegisterInfo.h8
-rw-r--r--lib/Target/R600/R600RegisterInfo.td45
-rw-r--r--lib/Target/R600/R600TextureIntrinsicsReplacer.cpp24
-rw-r--r--lib/Target/R600/SIDefines.h12
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp153
-rw-r--r--lib/Target/R600/SIISelLowering.cpp391
-rw-r--r--lib/Target/R600/SIISelLowering.h10
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp21
-rw-r--r--lib/Target/R600/SIInstrFormats.td30
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp512
-rw-r--r--lib/Target/R600/SIInstrInfo.h80
-rw-r--r--lib/Target/R600/SIInstrInfo.td174
-rw-r--r--lib/Target/R600/SIInstructions.td572
-rw-r--r--lib/Target/R600/SIIntrinsics.td24
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp17
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp4
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h1
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp59
-rw-r--r--lib/Target/R600/SIRegisterInfo.h18
-rw-r--r--lib/Target/R600/SIRegisterInfo.td16
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp162
-rw-r--r--lib/Target/Sparc/CMakeLists.txt3
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp20
-rw-r--r--lib/Target/Sparc/LLVMBuild.txt1
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h22
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp13
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h4
-rw-r--r--lib/Target/Sparc/Makefile3
-rw-r--r--lib/Target/Sparc/Sparc.h19
-rw-r--r--lib/Target/Sparc/Sparc.td4
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp78
-rw-r--r--lib/Target/Sparc/SparcCallingConv.td11
-rw-r--r--lib/Target/Sparc/SparcCodeEmitter.cpp245
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp104
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h8
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp10
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp994
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h35
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td66
-rw-r--r--lib/Target/Sparc/SparcInstrFormats.td85
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp104
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h1
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td451
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp165
-rw-r--r--lib/Target/Sparc/SparcJITInfo.h67
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp133
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h7
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td67
-rw-r--r--lib/Target/Sparc/SparcRelocations.h41
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp3
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp7
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp8
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp16
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt2
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp19
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp12
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h1
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp48
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp8
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp35
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h25
-rw-r--r--lib/Target/SystemZ/README.txt48
-rw-r--r--lib/Target/SystemZ/SystemZ.h24
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp134
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td4
-rw-r--r--lib/Target/SystemZ/SystemZConstantPoolValue.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp296
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp1187
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h109
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td93
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td241
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp502
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h28
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td721
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp23
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.cpp102
-rw-r--r--lib/Target/SystemZ/SystemZMCInstLower.h13
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp17
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h1
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td30
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td155
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td93
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td16
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td66
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp212
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h36
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp163
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp13
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h8
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp9
-rw-r--r--lib/Target/Target.cpp8
-rw-r--r--lib/Target/TargetLibraryInfo.cpp37
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp14
-rw-r--r--lib/Target/TargetMachine.cpp5
-rw-r--r--lib/Target/TargetMachineC.cpp54
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp19
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp410
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp31
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c244
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h32
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h69
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp16
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h16
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp16
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h20
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp401
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp30
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp24
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp177
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--lib/Target/X86/X86.td57
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp44
-rw-r--r--lib/Target/X86/X86AsmPrinter.h15
-rw-r--r--lib/Target/X86/X86CallingConv.h35
-rw-r--r--lib/Target/X86/X86CallingConv.td57
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp36
-rw-r--r--lib/Target/X86/X86FastISel.cpp311
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp9
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp268
-rw-r--r--lib/Target/X86/X86FrameLowering.h27
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp66
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1800
-rw-r--r--lib/Target/X86/X86ISelLowering.h50
-rw-r--r--lib/Target/X86/X86InstrAVX512.td2815
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td84
-rw-r--r--lib/Target/X86/X86InstrCompiler.td24
-rw-r--r--lib/Target/X86/X86InstrControl.td5
-rw-r--r--lib/Target/X86/X86InstrExtension.td12
-rw-r--r--lib/Target/X86/X86InstrFMA.td77
-rw-r--r--lib/Target/X86/X86InstrFormats.td39
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td78
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp397
-rw-r--r--lib/Target/X86/X86InstrInfo.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.td344
-rw-r--r--lib/Target/X86/X86InstrMMX.td64
-rw-r--r--lib/Target/X86/X86InstrSSE.td1554
-rw-r--r--lib/Target/X86/X86InstrXOP.td146
-rw-r--r--lib/Target/X86/X86JITInfo.cpp2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp184
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp19
-rw-r--r--lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--lib/Target/X86/X86SchedHaswell.td4
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td4
-rw-r--r--lib/Target/X86/X86Schedule.td48
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td24
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td668
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp59
-rw-r--r--lib/Target/X86/X86Subtarget.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp4
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp149
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp34
-rw-r--r--lib/Target/XCore/CMakeLists.txt1
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp8
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h4
-rw-r--r--lib/Target/XCore/XCore.h2
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp15
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp44
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp42
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h10
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h1
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td9
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp114
-rw-r--r--lib/Target/XCore/XCoreMCInstLower.cpp2
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp8
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h2
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.cpp83
484 files changed, 52416 insertions, 12585 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e17052b..9c2c69a 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -21,8 +21,11 @@ include "llvm/Target/Target.td"
// AArch64 Subtarget features.
//
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
+ "Enable ARMv8 FP">;
+
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
- "Enable Advanced SIMD instructions">;
+ "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable cryptographic instructions">;
@@ -33,7 +36,7 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
include "AArch64Schedule.td"
-def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>;
+def : Processor<"generic", GenericItineraries, [FeatureFPARMv8]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9498722..d59ca56 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -32,17 +32,18 @@ using namespace llvm;
/// argument to be printed as "bN".
static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
const TargetRegisterInfo *TRI,
- const TargetRegisterClass &RegClass,
- raw_ostream &O) {
+ char RegType, raw_ostream &O) {
if (!MO.isReg())
return true;
for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
- if (RegClass.contains(*AR)) {
- O << AArch64InstPrinter::getRegisterName(*AR);
+ if (AArch64::FPR8RegClass.contains(*AR)) {
+ O << RegType << TRI->getEncodingValue(MO.getReg());
return false;
}
}
+
+ // The register doesn't correspond to anything floating-point like.
return true;
}
@@ -81,9 +82,9 @@ bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
StringRef Modifier;
switch (MO.getType()) {
default:
- llvm_unreachable("Unexpected operand for symbolic address constraint");
+ return true;
case MachineOperand::MO_GlobalAddress:
- Name = Mang->getSymbol(MO.getGlobal())->getName();
+ Name = getSymbol(MO.getGlobal())->getName();
// Global variables may be accessed either via a GOT or in various fun and
// interesting TLS-model specific ways. Set the prefix modifier as
@@ -145,57 +146,29 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
- if (!ExtraCode || !ExtraCode[0]) {
- // There's actually no operand modifier, which leads to a slightly eclectic
- // set of behaviour which we have to handle here.
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unexpected operand for inline assembly");
- case MachineOperand::MO_Register:
- // GCC prints the unmodified operand of a 'w' constraint as the vector
- // register. Technically, we could allocate the argument as a VPR128, but
- // that leads to extremely dodgy copies being generated to get the data
- // there.
- if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
- O << AArch64InstPrinter::getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- O << '#' << MO.getImm();
- break;
- case MachineOperand::MO_FPImmediate:
- assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
- O << "#0.0";
- break;
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ExternalSymbol:
- return printSymbolicAddress(MO, false, "", O);
- }
- return false;
- }
- // We have a real modifier to handle.
+ if (!ExtraCode)
+ ExtraCode = "";
+
switch(ExtraCode[0]) {
default:
- // See if this is a generic operand
- return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
- case 'c': // Don't print "#" before an immediate operand.
- if (!MI->getOperand(OpNum).isImm())
- return true;
- O << MI->getOperand(OpNum).getImm();
- return false;
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+ return false;
+ break;
case 'w':
// Output 32-bit general register operand, constant zero as wzr, or stack
// pointer as wsp. Ignored when used with other operand types.
- return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR32RegClass, O);
+ if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR32RegClass, O))
+ return false;
+ break;
case 'x':
// Output 64-bit general register operand, constant zero as xzr, or stack
// pointer as sp. Ignored when used with other operand types.
- return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR64RegClass, O);
+ if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
+ AArch64::GPR64RegClass, O))
+ return false;
+ break;
case 'H':
// Output higher numbered of a 64-bit general register pair
case 'Q':
@@ -211,40 +184,65 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// copies ...).
llvm_unreachable("FIXME: Unimplemented register pairs");
case 'b':
- // Output 8-bit FP/SIMD scalar register operand, prefixed with b.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR8RegClass, O);
case 'h':
- // Output 16-bit FP/SIMD scalar register operand, prefixed with h.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR16RegClass, O);
case 's':
- // Output 32-bit FP/SIMD scalar register operand, prefixed with s.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR32RegClass, O);
case 'd':
- // Output 64-bit FP/SIMD scalar register operand, prefixed with d.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR64RegClass, O);
case 'q':
- // Output 128-bit FP/SIMD scalar register operand, prefixed with q.
- return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::FPR128RegClass, O);
+ if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
+ ExtraCode[0], O))
+ return false;
+ break;
case 'A':
// Output symbolic address with appropriate relocation modifier (also
// suitable for ADRP).
- return printSymbolicAddress(MI->getOperand(OpNum), false, "", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
+ return false;
+ break;
case 'L':
// Output bits 11:0 of symbolic address with appropriate :lo12: relocation
// modifier.
- return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
+ return false;
+ break;
case 'G':
// Output bits 23:12 of symbolic address with appropriate :hi12: relocation
// modifier (currently only for TLS local exec).
- return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O);
+ if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
+ return false;
+ break;
+ case 'a':
+ return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
}
+ // There's actually no operand modifier, which leads to a slightly eclectic
+ // set of behaviour which we have to handle here.
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unexpected operand for inline assembly");
+ case MachineOperand::MO_Register:
+ // GCC prints the unmodified operand of a 'w' constraint as the vector
+ // register. Technically, we could allocate the argument as a VPR128, but
+ // that leads to extremely dodgy copies being generated to get the data
+ // there.
+ if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
+ O << AArch64InstPrinter::getRegisterName(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ O << '#' << MO.getImm();
+ break;
+ case MachineOperand::MO_FPImmediate:
+ assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
+ O << "#0.0";
+ break;
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ return printSymbolicAddress(MO, false, "", O);
+ }
+ return false;
}
bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
index bff7eeb..a2a9f3f 100644
--- a/lib/Target/AArch64/AArch64CallingConv.td
+++ b/lib/Target/AArch64/AArch64CallingConv.td
@@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[
// Canonicalise the various types that live in different floating-point
// registers. This makes sense because the PCS does not distinguish Short
// Vectors and Floating-point types.
- CCIfType<[v2i8], CCBitConvertToType<f16>>,
- CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
+ CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
+ CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>,
+ CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>,
@@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[
// argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated."
- CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ee819e0..ef99541 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -109,6 +109,45 @@ public:
SDNode* Select(SDNode*);
private:
+ /// Get the opcode for table lookup instruction
+ unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
+
+ /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
+ /// IsExt is to indicate if the result will be extended with an argument.
+ SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
+
+ /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcode);
+
+ /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
+ SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// Form sequences of consecutive 64/128-bit registers for use in NEON
+ /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
+ /// between 1 and 4 elements. If it contains a single element that is returned
+ /// unchanged; otherwise a REG_SEQUENCE value is returned.
+ SDValue createDTuple(ArrayRef<SDValue> Vecs);
+ SDValue createQTuple(ArrayRef<SDValue> Vecs);
+
+ /// Generic helper for the createDTuple/createQTuple
+ /// functions. Those should almost always be called instead.
+ SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
+ unsigned SubRegs[]);
+
+ /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode array specifies the instructions used for load.
+ SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes);
+
+ /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
+ /// The opcode arrays specify the instructions used for load/store.
+ SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
+ unsigned NumVecs, const uint16_t *Opcodes);
+
+ SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand);
};
}
@@ -390,12 +429,607 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
&Ops[0], Ops.size());
}
+SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
+ static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
+ AArch64::DTripleRegClassID,
+ AArch64::DQuadRegClassID };
+ static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
+ AArch64::dsub_2, AArch64::dsub_3 };
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
+SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
+ static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
+ AArch64::QTripleRegClassID,
+ AArch64::QQuadRegClassID };
+ static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
+ AArch64::qsub_2, AArch64::qsub_3 };
+
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
+SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
+ unsigned RegClassIDs[],
+ unsigned SubRegs[]) {
+ // There's no special register-class for a vector-list of 1 element: it's just
+ // a vector.
+ if (Regs.size() == 1)
+ return Regs[0];
+
+ assert(Regs.size() >= 2 && Regs.size() <= 4);
+
+ SDLoc DL(Regs[0].getNode());
+
+ SmallVector<SDValue, 4> Ops;
+
+ // First operand of REG_SEQUENCE is the desired RegClass.
+ Ops.push_back(
+ CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
+
+ // Then we get pairs of source & subregister-position for the components.
+ for (unsigned i = 0; i < Regs.size(); ++i) {
+ Ops.push_back(Regs[i]);
+ Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
+ }
+
+ SDNode *N =
+ CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
+ return SDValue(N, 0);
+}
+
+
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+ switch (Opc) {
+ default: break;
+ case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
+ case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
+ case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
+ case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
+ case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
+ case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
+ case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
+ case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
+
+ case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
+ case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
+ case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
+ case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
+ case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
+ case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
+ case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
+
+ case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
+ case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
+ case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
+ case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
+ case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
+ case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
+ case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
+
+ case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
+ case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
+ case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
+ case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
+ case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
+ case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
+ case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
+
+ case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
+ case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
+ case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
+ case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
+ case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
+ case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
+ case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
+ case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
+
+ case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
+ case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
+ case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
+ case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
+ case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
+ case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
+ case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
+ case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
+
+ case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
+ case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
+ case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
+ case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
+ case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
+ case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
+ case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
+ case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
+
+ case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
+ case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
+ case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
+ case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
+ case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
+ case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
+ case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
+ case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
+
+ case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
+ case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
+ case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
+ case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
+ case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
+ case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
+ case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
+
+ case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
+ case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
+ case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
+ case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
+ case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
+ case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
+ case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
+
+ case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
+ case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
+ case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
+ case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
+ case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
+ case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
+ case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
+
+ case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
+ case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
+ case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
+ case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
+ case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
+ case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
+ case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
+ case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
+
+ case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
+ case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
+ case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
+ case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
+ case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
+ case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
+ case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
+ case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
+
+ case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
+ case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
+ case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
+ case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
+ case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
+ case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
+ case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
+ case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
+
+ // Post-index of duplicate loads
+ case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
+ case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
+ case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
+ case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
+ case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
+ case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
+ case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
+ case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
+
+ case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
+ case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
+ case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
+ case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
+ case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
+ case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
+ case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
+ case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
+
+ case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
+ case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
+ case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
+ case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
+ case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
+ case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
+ case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
+ case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
+
+ // Post-index of lane loads
+ case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
+ case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
+ case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
+ case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
+
+ case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
+ case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
+ case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
+ case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
+
+ case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
+ case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
+ case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
+ case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
+
+ // Post-index of lane stores
+ case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
+ case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
+ case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
+ case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
+
+ case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
+ case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
+ case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
+ case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
+
+ case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
+ case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
+ case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
+ case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
+ }
+ return Opc; // If not one we handle, return it unchanged.
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector load type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<SDValue, 2> Ops;
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
+ if (NumVecs == 1)
+ ResTys.push_back(VT);
+ else if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDLoc dl(N);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+
+ if (NumVecs == 1)
+ return VLd;
+
+ // If NumVecs > 1, the return result is a super register containing 2-4
+ // consecutive vector registers.
+ SDValue SuperReg = SDValue(VLd, 0);
+
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update users of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+
+ return NULL;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3;
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector store type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<EVT, 2> ResTys;
+ if (isUpdating)
+ ResTys.push_back(MVT::i64);
+ ResTys.push_back(MVT::Other); // Type for the Chain
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
+ Ops.push_back(SrcReg);
+
+ // Push back the Chain
+ Ops.push_back(N->getOperand(0));
+
+ // Transfer memoperands.
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+
+ return VSt;
+}
+
+SDValue
+AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
+ SDValue Operand) {
+ SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
+ VT, VTD, MVT::Other,
+ CurDAG->getTargetConstant(0, MVT::i64),
+ Operand,
+ CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
+ return SDValue(Reg, 0);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+ unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
+ SDLoc dl(N);
+
+ EVT VT = N->getValueType(0);
+ unsigned OpcodeIndex;
+ bool is64BitVector = VT.is64BitVector();
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
+ case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
+ case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
+ case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
+ default: llvm_unreachable("unhandled vector duplicate lane load type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SDValue SuperReg;
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(1)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(2);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+ Ops.push_back(N->getOperand(0)); // Push back the Chain
+
+ SmallVector<EVT, 3> ResTys;
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of the Chain
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
+
+ SuperReg = SDValue(VLdDup, 0);
+ unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+ // Update uses of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+ return NULL;
+}
+
+// We only have 128-bit vector type of load/store lane instructions.
+// If it is 64-bit vector, we also select it to the 128-bit instructions.
+// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
+// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
+SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+ bool isUpdating, unsigned NumVecs,
+ const uint16_t *Opcodes) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+ SDLoc dl(N);
+ unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ unsigned Vec0Idx = 3;
+
+ SDValue Chain = N->getOperand(0);
+ unsigned Lane =
+ cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+ EVT VT = N->getOperand(Vec0Idx).getValueType();
+ bool is64BitVector = VT.is64BitVector();
+ EVT VT64; // 64-bit Vector Type
+
+ if (is64BitVector) {
+ VT64 = VT;
+ VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ }
+
+ unsigned OpcodeIndex;
+ switch (VT.getScalarType().getSizeInBits()) {
+ case 8: OpcodeIndex = 0; break;
+ case 16: OpcodeIndex = 1; break;
+ case 32: OpcodeIndex = 2; break;
+ case 64: OpcodeIndex = 3; break;
+ default: llvm_unreachable("unhandled vector lane load/store type");
+ }
+ unsigned Opc = Opcodes[OpcodeIndex];
+
+ SmallVector<EVT, 3> ResTys;
+ if (IsLoad) {
+ // Push back the type of return super register
+ if (NumVecs == 3)
+ ResTys.push_back(MVT::Untyped);
+ else {
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
+ is64BitVector ? NumVecs : NumVecs * 2);
+ ResTys.push_back(ResTy);
+ }
+ }
+ if (isUpdating)
+ ResTys.push_back(MVT::i64); // Type of the updated register
+ ResTys.push_back(MVT::Other); // Type of Chain
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ if (isUpdating) {
+ SDValue Inc = N->getOperand(AddrOpIdx + 1);
+ if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ Ops.push_back(Inc);
+ }
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ if (is64BitVector)
+ for (unsigned i = 0; i < Regs.size(); i++)
+ Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
+ SDValue SuperReg = createQTuple(Regs);
+
+ Ops.push_back(SuperReg); // Source Reg
+ SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
+ Ops.push_back(LaneValue);
+ Ops.push_back(Chain); // Push back the Chain
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
+ if (!IsLoad)
+ return VLdLn;
+
+ // Extract the subregisters.
+ SuperReg = SDValue(VLdLn, 0);
+ unsigned Sub0 = AArch64::qsub_0;
+ // Update uses of each registers in super register
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
+ if (is64BitVector) {
+ SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
+ }
+ ReplaceUses(SDValue(N, Vec), SUB0);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+ if (isUpdating)
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+ return NULL;
+}
+
+unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
+ unsigned NumOfVec) {
+ assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
+
+ unsigned Opc = 0;
+ switch (NumOfVec) {
+ default:
+ break;
+ case 1:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
+ break;
+ case 2:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
+ break;
+ case 3:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
+ break;
+ case 4:
+ if (IsExt)
+ Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
+ else
+ Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
+ break;
+ }
+
+ return Opc;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
+ bool IsExt) {
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ SDLoc dl(N);
+
+ // Check the element of look up table is 64-bit or not
+ unsigned Vec0Idx = IsExt ? 2 : 1;
+ assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
+ "The element of lookup table for vtbl and vtbx must be 128-bit");
+
+ // Check the return value type is 64-bit or not
+ EVT ResVT = N->getValueType(0);
+ bool is64BitRes = ResVT.is64BitVector();
+
+ // Create new SDValue for vector list
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
+ N->op_begin() + Vec0Idx + NumVecs);
+ SDValue TblReg = createQTuple(Regs);
+ unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
+
+ SmallVector<SDValue, 3> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(TblReg);
+ Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
+ return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
+}
+
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
@@ -535,6 +1169,399 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
Node = ResNode;
break;
}
+ case AArch64ISD::NEON_LD1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
+ AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
+ AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
+ AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 1, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
+ AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
+ AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
+ AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
+ AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
+ AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
+ AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
+ AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
+ AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
+ AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed,
+ AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
+ AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
+ AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed,
+ AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
+ AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
+ AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD1x4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed,
+ AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
+ AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
+ AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed
+ };
+ return SelectVLD(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
+ AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
+ AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
+ AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
+ };
+ return SelectVST(Node, true, 1, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
+ AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
+ AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
+ AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
+ };
+ return SelectVST(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
+ AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
+ AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
+ AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
+ };
+ return SelectVST(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
+ AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
+ AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
+ AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
+ };
+ return SelectVST(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
+ AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
+ AArch64::LD2R_4S, AArch64::LD2R_2D
+ };
+ return SelectVLDDup(Node, false, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
+ AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
+ AArch64::LD3R_4S, AArch64::LD3R_2D
+ };
+ return SelectVLDDup(Node, false, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
+ AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
+ AArch64::LD4R_4S, AArch64::LD4R_2D
+ };
+ return SelectVLDDup(Node, false, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed,
+ AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed,
+ AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
+ AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed,
+ AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed,
+ AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
+ AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4DUP_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed,
+ AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed,
+ AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
+ AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed
+ };
+ return SelectVLDDup(Node, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_LD2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
+ AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_LD3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
+ AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_LD4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
+ AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, true, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST2LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
+ AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST3LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
+ AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST4LN_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
+ AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
+ };
+ return SelectVLDSTLane(Node, false, true, 4, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x2_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed,
+ AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
+ AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
+ AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed
+ };
+ return SelectVST(Node, true, 2, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x3_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed,
+ AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
+ AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
+ AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed
+ };
+ return SelectVST(Node, true, 3, Opcodes);
+ }
+ case AArch64ISD::NEON_ST1x4_UPD: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed,
+ AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
+ AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
+ AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed
+ };
+ return SelectVST(Node, true, 4, Opcodes);
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ bool IsExt = false;
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_vtbx1:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl1:
+ return SelectVTBL(Node, 1, IsExt);
+ case Intrinsic::aarch64_neon_vtbx2:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl2:
+ return SelectVTBL(Node, 2, IsExt);
+ case Intrinsic::aarch64_neon_vtbx3:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl3:
+ return SelectVTBL(Node, 3, IsExt);
+ case Intrinsic::aarch64_neon_vtbx4:
+ IsExt = true;
+ case Intrinsic::aarch64_neon_vtbl4:
+ return SelectVTBL(Node, 4, IsExt);
+ }
+ break;
+ }
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::arm_neon_vld1: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
+ AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
+ };
+ return SelectVLD(Node, false, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
+ AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
+ };
+ return SelectVLD(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
+ AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
+ };
+ return SelectVLD(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
+ AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
+ };
+ return SelectVLD(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
+ AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
+ AArch64::LD1x2_4S, AArch64::LD1x2_2D
+ };
+ return SelectVLD(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
+ AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
+ AArch64::LD1x3_4S, AArch64::LD1x3_2D
+ };
+ return SelectVLD(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vld1x4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
+ AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
+ AArch64::LD1x4_4S, AArch64::LD1x4_2D
+ };
+ return SelectVLD(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst1: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
+ AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
+ };
+ return SelectVST(Node, false, 1, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
+ AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
+ };
+ return SelectVST(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
+ AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
+ };
+ return SelectVST(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
+ AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x2: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
+ AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
+ AArch64::ST1x2_4S, AArch64::ST1x2_2D
+ };
+ return SelectVST(Node, false, 2, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x3: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
+ AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
+ AArch64::ST1x3_4S, AArch64::ST1x3_2D
+ };
+ return SelectVST(Node, false, 3, Opcodes);
+ }
+ case Intrinsic::aarch64_neon_vst1x4: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
+ AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
+ AArch64::ST1x4_4S, AArch64::ST1x4_2D
+ };
+ return SelectVST(Node, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vld4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
+ };
+ return SelectVLDSTLane(Node, true, false, 4, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst2lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 2, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst3lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 3, Opcodes);
+ }
+ case Intrinsic::arm_neon_vst4lane: {
+ static const uint16_t Opcodes[] = {
+ AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
+ };
+ return SelectVLDSTLane(Node, false, false, 4, Opcodes);
+ }
+ } // End of switch IntNo
+ break;
+ } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
default:
break; // Let generic code handle it
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 44b691b..4fdb667 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -50,24 +50,33 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
// Scalar register <-> type mapping
addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
- addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
- addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
- addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+
+ if (Subtarget->hasFPARMv8()) {
+ addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
+ }
if (Subtarget->hasNEON()) {
// And the vectors
- addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
- addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
- addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
+ addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
+ addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
+ addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
+ addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
+ addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
}
computeRegisterProperties();
@@ -77,6 +86,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::SHL);
+
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
// AArch64 does not have i1 loads, or much of anything for i1 really.
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
@@ -270,28 +285,89 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) {
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
+
setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1f32, Custom);
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
}
}
@@ -333,6 +409,29 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
StrOpc = StoreOps[Log2_32(Size)];
}
+// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
+// have value type mapped, and they are both being defined as MVT::untyped.
+// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
+// would fail to figure out the register pressure correctly.
+std::pair<const TargetRegisterClass*, uint8_t>
+AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::v4i64:
+ RRC = &AArch64::QPairRegClass;
+ Cost = 2;
+ break;
+ case MVT::v8i64:
+ RRC = &AArch64::QQuadRegClass;
+ Cost = 4;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
MachineBasicBlock *
AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size,
@@ -658,6 +757,12 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
+ if (!NZCVKilled) {
+ // NZCV is live-through TrueBB.
+ TrueBB->addLiveIn(AArch64::NZCV);
+ EndBB->addLiveIn(AArch64::NZCV);
+ }
+
// IfTrue:
// str qIFTRUE, [sp]
BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
@@ -672,8 +777,6 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
// Done:
// ldr qDEST, [sp]
// [... rest of incoming MBB ...]
- if (!NZCVKilled)
- EndBB->addLiveIn(AArch64::NZCV);
MachineInstr *StartOfEnd = EndBB->begin();
BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
.addFrameIndex(ScratchFI)
@@ -833,6 +936,86 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_CMPZ";
case AArch64ISD::NEON_TST:
return "AArch64ISD::NEON_TST";
+ case AArch64ISD::NEON_QSHLs:
+ return "AArch64ISD::NEON_QSHLs";
+ case AArch64ISD::NEON_QSHLu:
+ return "AArch64ISD::NEON_QSHLu";
+ case AArch64ISD::NEON_VDUP:
+ return "AArch64ISD::NEON_VDUP";
+ case AArch64ISD::NEON_VDUPLANE:
+ return "AArch64ISD::NEON_VDUPLANE";
+ case AArch64ISD::NEON_REV16:
+ return "AArch64ISD::NEON_REV16";
+ case AArch64ISD::NEON_REV32:
+ return "AArch64ISD::NEON_REV32";
+ case AArch64ISD::NEON_REV64:
+ return "AArch64ISD::NEON_REV64";
+ case AArch64ISD::NEON_UZP1:
+ return "AArch64ISD::NEON_UZP1";
+ case AArch64ISD::NEON_UZP2:
+ return "AArch64ISD::NEON_UZP2";
+ case AArch64ISD::NEON_ZIP1:
+ return "AArch64ISD::NEON_ZIP1";
+ case AArch64ISD::NEON_ZIP2:
+ return "AArch64ISD::NEON_ZIP2";
+ case AArch64ISD::NEON_TRN1:
+ return "AArch64ISD::NEON_TRN1";
+ case AArch64ISD::NEON_TRN2:
+ return "AArch64ISD::NEON_TRN2";
+ case AArch64ISD::NEON_LD1_UPD:
+ return "AArch64ISD::NEON_LD1_UPD";
+ case AArch64ISD::NEON_LD2_UPD:
+ return "AArch64ISD::NEON_LD2_UPD";
+ case AArch64ISD::NEON_LD3_UPD:
+ return "AArch64ISD::NEON_LD3_UPD";
+ case AArch64ISD::NEON_LD4_UPD:
+ return "AArch64ISD::NEON_LD4_UPD";
+ case AArch64ISD::NEON_ST1_UPD:
+ return "AArch64ISD::NEON_ST1_UPD";
+ case AArch64ISD::NEON_ST2_UPD:
+ return "AArch64ISD::NEON_ST2_UPD";
+ case AArch64ISD::NEON_ST3_UPD:
+ return "AArch64ISD::NEON_ST3_UPD";
+ case AArch64ISD::NEON_ST4_UPD:
+ return "AArch64ISD::NEON_ST4_UPD";
+ case AArch64ISD::NEON_LD1x2_UPD:
+ return "AArch64ISD::NEON_LD1x2_UPD";
+ case AArch64ISD::NEON_LD1x3_UPD:
+ return "AArch64ISD::NEON_LD1x3_UPD";
+ case AArch64ISD::NEON_LD1x4_UPD:
+ return "AArch64ISD::NEON_LD1x4_UPD";
+ case AArch64ISD::NEON_ST1x2_UPD:
+ return "AArch64ISD::NEON_ST1x2_UPD";
+ case AArch64ISD::NEON_ST1x3_UPD:
+ return "AArch64ISD::NEON_ST1x3_UPD";
+ case AArch64ISD::NEON_ST1x4_UPD:
+ return "AArch64ISD::NEON_ST1x4_UPD";
+ case AArch64ISD::NEON_LD2DUP:
+ return "AArch64ISD::NEON_LD2DUP";
+ case AArch64ISD::NEON_LD3DUP:
+ return "AArch64ISD::NEON_LD3DUP";
+ case AArch64ISD::NEON_LD4DUP:
+ return "AArch64ISD::NEON_LD4DUP";
+ case AArch64ISD::NEON_LD2DUP_UPD:
+ return "AArch64ISD::NEON_LD2DUP_UPD";
+ case AArch64ISD::NEON_LD3DUP_UPD:
+ return "AArch64ISD::NEON_LD3DUP_UPD";
+ case AArch64ISD::NEON_LD4DUP_UPD:
+ return "AArch64ISD::NEON_LD4DUP_UPD";
+ case AArch64ISD::NEON_LD2LN_UPD:
+ return "AArch64ISD::NEON_LD2LN_UPD";
+ case AArch64ISD::NEON_LD3LN_UPD:
+ return "AArch64ISD::NEON_LD3LN_UPD";
+ case AArch64ISD::NEON_LD4LN_UPD:
+ return "AArch64ISD::NEON_LD4LN_UPD";
+ case AArch64ISD::NEON_ST2LN_UPD:
+ return "AArch64ISD::NEON_ST2LN_UPD";
+ case AArch64ISD::NEON_ST3LN_UPD:
+ return "AArch64ISD::NEON_ST3LN_UPD";
+ case AArch64ISD::NEON_ST4LN_UPD:
+ return "AArch64ISD::NEON_ST4LN_UPD";
+ case AArch64ISD::NEON_VEXTRACT:
+ return "AArch64ISD::NEON_VEXTRACT";
default:
return NULL;
}
@@ -908,24 +1091,31 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
}
}
+ if (getSubtarget()->hasFPARMv8()) {
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
- if (FPRSaveSize != 0) {
- FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
-
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
-
- for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
- &AArch64::FPR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
+ // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
+ // can omit a register save area if we know we'll never use registers of
+ // that class.
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
+ &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16),
+ false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
}
+ FuncInfo->setVariadicFPRIdx(FPRIdx);
+ FuncInfo->setVariadicFPRSize(FPRSaveSize);
}
int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
@@ -933,8 +1123,6 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
FuncInfo->setVariadicStackIdx(StackIdx);
FuncInfo->setVariadicGPRIdx(GPRIdx);
FuncInfo->setVariadicGPRSize(GPRSaveSize);
- FuncInfo->setVariadicFPRIdx(FPRIdx);
- FuncInfo->setVariadicFPRSize(FPRSaveSize);
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
@@ -1875,7 +2063,7 @@ AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
SDValue SrcVal = Op.getOperand(0);
return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op));
+ /*isSigned*/ false, SDLoc(Op)).first;
}
SDValue
@@ -1905,6 +2093,45 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
return LowerF128ToCall(Op, DAG, LC);
}
+SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(8, MVT::i64);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return X30, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
+}
+
+
+SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
+ const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned FrameReg = AArch64::X29;
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ return FrameAddr;
+}
+
SDValue
AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
SelectionDAG &DAG) const {
@@ -2650,6 +2877,8 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
@@ -2664,6 +2893,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
}
return SDValue();
@@ -3235,6 +3465,336 @@ static SDValue PerformSRACombine(SDNode *N,
DAG.getConstant(LSB + Width - 1, MVT::i64));
}
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift operation, where all the elements of the build_vector
+/// must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// Check if this is a valid build_vector for the immediate operand of
+/// a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits
+static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && Cnt < ElementBits);
+}
+
+/// Check if this is a valid build_vector for the immediate operand of a
+/// vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits
+static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 1 && Cnt <= ElementBits);
+}
+
+/// Checks for immediate versions of vector shifts and lowers them.
+static SDValue PerformShiftCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *ST) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
+ return PerformSRACombine(N, DCI);
+
+ // Nothing to be done for scalar shifts.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ assert(ST->hasNEON() && "unexpected vector shift");
+ int64_t Cnt;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
+ SDValue RHS =
+ DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
+ }
+ break;
+ }
+
+ return SDValue();
+}
+
+/// ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+
+ switch (IntNo) {
+ default:
+ // Don't do anything for most intrinsics.
+ break;
+
+ case Intrinsic::arm_neon_vqshifts:
+ case Intrinsic::arm_neon_vqshiftu:
+ EVT VT = N->getOperand(1).getValueType();
+ int64_t Cnt;
+ if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
+ break;
+ unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
+ ? AArch64ISD::NEON_QSHLs
+ : AArch64ISD::NEON_QSHLu;
+ return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ return SDValue();
+}
+
+/// Target-specific DAG combine function for NEON load/store intrinsics
+/// to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+ N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+ unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ SDValue Addr = N->getOperand(AddrOpIdx);
+
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+ UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD ||
+ UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load/store. Otherwise, folding
+ // it would create a cycle.
+ if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+ continue;
+
+ // Find the new opcode for the updating load/store.
+ bool isLoad = true;
+ bool isLaneOp = false;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD;
+ NumVecs = 1; break;
+ case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD;
+ NumVecs = 1; isLoad = false; break;
+ case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
+ NumVecs = 2; break;
+ case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
+ NumVecs = 3; break;
+ case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
+ NumVecs = 4; break;
+ case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
+ NumVecs = 2; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
+ NumVecs = 3; isLoad = false; break;
+ case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
+ NumVecs = 4; isLoad = false; break;
+ case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD;
+ NumVecs = 2; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD;
+ NumVecs = 3; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD;
+ NumVecs = 4; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD;
+ NumVecs = 2; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD;
+ NumVecs = 3; isLoad = false; isLaneOp = true; break;
+ case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD;
+ NumVecs = 4; isLoad = false; isLaneOp = true; break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unexpected opcode for Neon base update");
+ case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
+ NumVecs = 2; break;
+ case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
+ NumVecs = 3; break;
+ case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
+ NumVecs = 4; break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoad)
+ VecTy = N->getValueType(0);
+ else
+ VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint32_t IncVal = CInc->getZExtValue();
+ if (IncVal != NumBytes)
+ continue;
+ Inc = DAG.getTargetConstant(IncVal, MVT::i32);
+ }
+
+ // Create the new updating load/store node.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = VecTy;
+ Tys[n++] = MVT::i64;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(AddrOpIdx));
+ Ops.push_back(Inc);
+ for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+ MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
+ Ops.data(), Ops.size(),
+ MemInt->getMemoryVT(),
+ MemInt->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i) {
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+ }
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+ break;
+ }
+ return SDValue();
+}
+
+/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
+/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
+/// If so, combine them to a vldN-dup operation and return true.
+static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+ SDNode *VLD = N->getOperand(0).getNode();
+ if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return SDValue();
+ unsigned NumVecs = 0;
+ unsigned NewOpc = 0;
+ unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::arm_neon_vld2lane) {
+ NumVecs = 2;
+ NewOpc = AArch64ISD::NEON_LD2DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+ NumVecs = 3;
+ NewOpc = AArch64ISD::NEON_LD3DUP;
+ } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+ NumVecs = 4;
+ NewOpc = AArch64ISD::NEON_LD4DUP;
+ } else {
+ return SDValue();
+ }
+
+ // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+ // numbers match the load.
+ unsigned VLDLaneNo =
+ cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ // Ignore uses of the chain result.
+ if (UI.getUse().getResNo() == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
+ VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+ return SDValue();
+ }
+
+ // Create the vldN-dup node.
+ EVT Tys[5];
+ unsigned n;
+ for (n = 0; n < NumVecs; ++n)
+ Tys[n] = VT;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1);
+ SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+ MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+ SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2,
+ VLDMemInt->getMemoryVT(),
+ VLDMemInt->getMemOperand());
+
+ // Update the uses.
+ for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+ UI != UE; ++UI) {
+ unsigned ResNo = UI.getUse().getResNo();
+ // Ignore uses of the chain result.
+ if (ResNo == NumVecs)
+ continue;
+ SDNode *User = *UI;
+ DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ }
+
+ // Now the vldN-lane intrinsic is dead except for its chain result.
+ // Update uses of the chain.
+ std::vector<SDValue> VLDDupResults;
+ for (unsigned n = 0; n < NumVecs; ++n)
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+ VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+ DCI.CombineTo(VLD, VLDDupResults);
+
+ return SDValue(N, 0);
+}
SDValue
AArch64TargetLowering::PerformDAGCombine(SDNode *N,
@@ -3243,7 +3803,45 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
- case ISD::SRA: return PerformSRACombine(N, DCI);
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ return PerformShiftCombine(N, DCI, getSubtarget());
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformIntrinsicCombine(N, DCI.DAG);
+ case AArch64ISD::NEON_VDUPLANE:
+ return CombineVLDDUP(N, DCI);
+ case AArch64ISD::NEON_LD2DUP:
+ case AArch64ISD::NEON_LD3DUP:
+ case AArch64ISD::NEON_LD4DUP:
+ return CombineBaseUpdate(N, DCI);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::aarch64_neon_vld1x2:
+ case Intrinsic::aarch64_neon_vld1x3:
+ case Intrinsic::aarch64_neon_vld1x4:
+ case Intrinsic::aarch64_neon_vst1x2:
+ case Intrinsic::aarch64_neon_vst1x3:
+ case Intrinsic::aarch64_neon_vst1x4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane:
+ return CombineBaseUpdate(N, DCI);
+ default:
+ break;
+ }
}
return SDValue();
}
@@ -3269,6 +3867,59 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
+// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
+// try to call LowerVECTOR_SHUFFLE to lower it.
+bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
+ SDValue &Res) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned V0NumElts = 0;
+ int Mask[16];
+ SDValue V0, V1;
+
+ // Check if all elements are extracted from less than 3 vectors.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ if (V0.getNode() == 0) {
+ V0 = Elt.getOperand(0);
+ V0NumElts = V0.getValueType().getVectorNumElements();
+ }
+ if (Elt.getOperand(0) == V0) {
+ Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
+ continue;
+ } else if (V1.getNode() == 0) {
+ V1 = Elt.getOperand(0);
+ }
+ if (Elt.getOperand(0) == V1) {
+ unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
+ Mask[i] = (Lane + V0NumElts);
+ continue;
+ } else {
+ return false;
+ }
+ }
+
+ if (!V1.getNode() && V0NumElts == NumElts * 2) {
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(NumElts, MVT::i64));
+ V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
+ DAG.getConstant(0, MVT::i64));
+ V0NumElts = V0.getValueType().getVectorNumElements();
+ }
+
+ if (V1.getNode() && NumElts == V0NumElts &&
+ V0NumElts == V1.getValueType().getVectorNumElements()) {
+ SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
+ Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
+ return true;
+ } else
+ return false;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue
@@ -3283,12 +3934,15 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned SplatBitSize;
bool HasAnyUndefs;
+ unsigned UseNeonMov = VT.getSizeInBits() >= 64;
+
// Note we favor lowering MOVI over MVNI.
// This has implications on the definition of patterns in TableGen to select
// BIC immediate instructions but not ORR immediate instructions.
// If this lowering order is changed, TableGen patterns for BIC immediate and
// ORR immediate instructions have to be updated.
- if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ if (UseNeonMov &&
+ BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// First attempt to use vector immediate-form MOVI
EVT NeonMovVT;
@@ -3336,9 +3990,390 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
}
}
+
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool hasDominantValue = false;
+ bool isConstant = true;
+
+ // Map of the number of times a particular SDValue appears in the
+ // element list.
+ DenseMap<SDValue, unsigned> ValueCounts;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ ValueCounts.insert(std::make_pair(V, 0));
+ unsigned &Count = ValueCounts[V];
+
+ // Is this value dominant? (takes up more than half of the lanes)
+ if (++Count > (NumElts / 2)) {
+ hasDominantValue = true;
+ Value = V;
+ }
+ }
+ if (ValueCounts.size() != 1)
+ usesOnlyOneValue = false;
+ if (!Value.getNode() && ValueCounts.size() > 0)
+ Value = ValueCounts.begin()->first;
+
+ if (ValueCounts.size() == 0)
+ return DAG.getUNDEF(VT);
+
+ // Loads are better lowered with insert_vector_elt.
+ // Keep going if we are hitting this case.
+ if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (hasDominantValue && EltSize <= 64) {
+ // Use VDUP for non-constant splats.
+ if (!isConstant) {
+ SDValue N;
+
+ // If we are DUPing a value that comes directly from a vector, we could
+ // just use DUPLANE. We can only do this if the lane being extracted
+ // is at a constant index, as the DUP from lane instructions only have
+ // constant-index forms.
+ if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(Value->getOperand(1))) {
+ N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT,
+ Value->getOperand(0), Value->getOperand(1));
+ } else
+ N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+
+ if (!usesOnlyOneValue) {
+ // The dominant value was splatted as 'N', but we now have to insert
+ // all differing elements.
+ for (unsigned I = 0; I < NumElts; ++I) {
+ if (Op.getOperand(I) == Value)
+ continue;
+ SmallVector<SDValue, 3> Ops;
+ Ops.push_back(N);
+ Ops.push_back(Op.getOperand(I));
+ Ops.push_back(DAG.getConstant(I, MVT::i64));
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
+ }
+ }
+ return N;
+ }
+ if (usesOnlyOneValue && isConstant) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
+ }
+ }
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Try to lower this in lowering ShuffleVector way.
+ SDValue Shuf;
+ if (isKnownShuffleVector(Op, DAG, Shuf))
+ return Shuf;
+
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ for (unsigned i = 0 ; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
return SDValue();
}
+/// isREVMask - Check if a vector shuffle corresponds to a REV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0)
+ continue; // ignore UNDEF indices
+ if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
+// TRN instruction.
+static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts < 4)
+ return 0;
+
+ bool ismatch = true;
+
+ // Check UZP1
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i * 2) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_UZP1;
+
+ // Check UZP2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i * 2 + 1) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_UZP2;
+
+ // Check ZIP1
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_ZIP1;
+
+ // Check ZIP2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_ZIP2;
+
+ // Check TRN1
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_TRN1;
+
+ // Check TRN2
+ ismatch = true;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
+ ismatch = false;
+ break;
+ }
+ }
+ if (ismatch)
+ return AArch64ISD::NEON_TRN2;
+
+ return 0;
+}
+
+SDValue
+AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize > 64)
+ return SDValue();
+
+ if (isREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
+ if (isREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
+ if (isREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
+
+ unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
+ if (ISDNo)
+ return DAG.getNode(ISDNo, dl, VT, V1, V2);
+
+ // If the element of shuffle mask are all the same constant, we can
+ // transform it into either NEON_VDUP or NEON_VDUPLANE
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
+ }
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
+ if (V1.getOpcode() == ISD::BUILD_VECTOR) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
+ i != (unsigned)Lane) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
+ V1.getOperand(Lane));
+ }
+
+ // Test if V1 is a EXTRACT_SUBVECTOR.
+ if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
+ DAG.getConstant(Lane + ExtLane, MVT::i64));
+ }
+ // Test if V1 is a CONCAT_VECTORS.
+ if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
+ V1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ SDValue Op0 = V1.getOperand(0);
+ assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
+ "Invalid vector lane access");
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
+ DAG.getConstant(Lane, MVT::i64));
+ }
+
+ return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i64));
+ }
+
+ int Length = ShuffleMask.size();
+ int V1EltNum = V1.getValueType().getVectorNumElements();
+
+ // If the number of v1 elements is the same as the number of shuffle mask
+ // element and the shuffle masks are sequential values, we can transform
+ // it into NEON_VEXTRACT.
+ if (V1EltNum == Length) {
+ // Check if the shuffle mask is sequential.
+ bool IsSequential = true;
+ int CurMask = ShuffleMask[0];
+ for (int I = 0; I < Length; ++I) {
+ if (ShuffleMask[I] != CurMask) {
+ IsSequential = false;
+ break;
+ }
+ CurMask++;
+ }
+ if (IsSequential) {
+ assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
+ unsigned VecSize = EltSize * V1EltNum;
+ unsigned Index = (EltSize/8) * ShuffleMask[0];
+ if (VecSize == 64 || VecSize == 128)
+ return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
+ DAG.getConstant(Index, MVT::i64));
+ }
+ }
+
+ // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
+ // by element from V2 to V1 .
+ // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
+ // better choice to be inserted than V1 as less insert needed, so we count
+ // element to be inserted for both V1 and V2, and select less one as insert
+ // target.
+
+ // Collect elements need to be inserted and their index.
+ SmallVector<int, 8> NV1Elt;
+ SmallVector<int, 8> N1Index;
+ SmallVector<int, 8> NV2Elt;
+ SmallVector<int, 8> N2Index;
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != I) {
+ NV1Elt.push_back(ShuffleMask[I]);
+ N1Index.push_back(I);
+ }
+ }
+ for (int I = 0; I != Length; ++I) {
+ if (ShuffleMask[I] != (I + V1EltNum)) {
+ NV2Elt.push_back(ShuffleMask[I]);
+ N2Index.push_back(I);
+ }
+ }
+
+ // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
+ // will be inserted.
+ SDValue InsV = V1;
+ SmallVector<int, 8> InsMasks = NV1Elt;
+ SmallVector<int, 8> InsIndex = N1Index;
+ if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
+ if (NV1Elt.size() > NV2Elt.size()) {
+ InsV = V2;
+ InsMasks = NV2Elt;
+ InsIndex = N2Index;
+ }
+ } else {
+ InsV = DAG.getNode(ISD::UNDEF, dl, VT);
+ }
+
+ for (int I = 0, E = InsMasks.size(); I != E; ++I) {
+ SDValue ExtV = V1;
+ int Mask = InsMasks[I];
+ if (Mask >= V1EltNum) {
+ ExtV = V2;
+ Mask -= V1EltNum;
+ }
+ // Any value type smaller than i32 is illegal in AArch64, and this lower
+ // function is called after legalize pass, so we need to legalize
+ // the result here.
+ EVT EltVT;
+ if (VT.getVectorElementType().isFloatingPoint())
+ EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
+ else
+ EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
+
+ if (Mask >= 0) {
+ ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
+ DAG.getConstant(Mask, MVT::i64));
+ InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
+ DAG.getConstant(InsIndex[I], MVT::i64));
+ }
+ }
+ return InsV;
+}
+
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
@@ -3484,14 +4519,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &AArch64::FPR16RegClass);
else if (VT == MVT::f32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
- else if (VT == MVT::f64)
- return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, &AArch64::VPR64RegClass);
- else if (VT == MVT::f128)
- return std::make_pair(0U, &AArch64::FPR128RegClass);
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &AArch64::VPR128RegClass);
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
}
@@ -3500,3 +4531,69 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
// constraint into a member of a register class.
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
+
+/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
+/// The associated MachineMemOperands record the alignment specified
+/// in the intrinsic calls.
+bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::aarch64_neon_vld1x2:
+ case Intrinsic::aarch64_neon_vld1x3:
+ case Intrinsic::aarch64_neon_vld1x4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::aarch64_neon_vst1x2:
+ case Intrinsic::aarch64_neon_vst1x3:
+ case Intrinsic::aarch64_neon_vst1x4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 67a908e..8ad5a79 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -19,7 +19,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
-
+#include "llvm/IR/Intrinsics.h"
namespace llvm {
namespace AArch64ISD {
@@ -125,6 +125,19 @@ namespace AArch64ISD {
// Vector FP move immediate
NEON_FMOVIMM,
+ // Vector permute
+ NEON_UZP1,
+ NEON_UZP2,
+ NEON_ZIP1,
+ NEON_ZIP2,
+ NEON_TRN1,
+ NEON_TRN2,
+
+ // Vector Element reverse
+ NEON_REV64,
+ NEON_REV32,
+ NEON_REV16,
+
// Vector compare
NEON_CMP,
@@ -132,7 +145,58 @@ namespace AArch64ISD {
NEON_CMPZ,
// Vector compare bitwise test
- NEON_TST
+ NEON_TST,
+
+ // Vector saturating shift
+ NEON_QSHLs,
+ NEON_QSHLu,
+
+ // Vector dup
+ NEON_VDUP,
+
+ // Vector dup by lane
+ NEON_VDUPLANE,
+
+ // Vector extract
+ NEON_VEXTRACT,
+
+ // NEON duplicate lane loads
+ NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ NEON_LD3DUP,
+ NEON_LD4DUP,
+
+ // NEON loads with post-increment base updates:
+ NEON_LD1_UPD,
+ NEON_LD2_UPD,
+ NEON_LD3_UPD,
+ NEON_LD4_UPD,
+ NEON_LD1x2_UPD,
+ NEON_LD1x3_UPD,
+ NEON_LD1x4_UPD,
+
+ // NEON stores with post-increment base updates:
+ NEON_ST1_UPD,
+ NEON_ST2_UPD,
+ NEON_ST3_UPD,
+ NEON_ST4_UPD,
+ NEON_ST1x2_UPD,
+ NEON_ST1x3_UPD,
+ NEON_ST1x4_UPD,
+
+ // NEON duplicate lane loads with post-increment base updates:
+ NEON_LD2DUP_UPD,
+ NEON_LD3DUP_UPD,
+ NEON_LD4DUP_UPD,
+
+ // NEON lane loads with post-increment base updates:
+ NEON_LD2LN_UPD,
+ NEON_LD3LN_UPD,
+ NEON_LD4LN_UPD,
+
+ // NEON lane store with post-increment base updates:
+ NEON_ST2LN_UPD,
+ NEON_ST3LN_UPD,
+ NEON_ST4LN_UPD
};
}
@@ -169,9 +233,13 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
+
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+
void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
SDValue &Chain) const;
@@ -234,6 +302,8 @@ public:
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
@@ -269,6 +339,14 @@ public:
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const LLVM_OVERRIDE;
+
+protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(MVT VT) const;
+
private:
const InstrItineraryData *Itins;
@@ -280,6 +358,10 @@ enum NeonModImmType {
Neon_Mov_Imm,
Neon_Mvn_Imm
};
+
+extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
+ bool &usesOnlyOneValue, bool &hasDominantValue,
+ bool &isConstant, bool &isUNDEF);
} // namespace llvm
#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 09451fd..34f917c 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -120,6 +120,14 @@ class A64InstRdnm<dag outs, dag ins, string asmstr,
let Inst{20-16} = Rm;
}
+class A64InstRtnm<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Rm;
+
+ let Inst{20-16} = Rm;
+}
+
//===----------------------------------------------------------------------===//
//
// Actual A64 Instruction Formats
@@ -383,6 +391,8 @@ class A64I_extract<bit sf, bits<3> op, bit n,
// Inherits Rd in 4-0
}
+let Predicates = [HasFPARMv8] in {
+
// Format for floating-point compare instructions.
class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
dag outs, dag ins, string asmstr,
@@ -562,6 +572,8 @@ class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
// Inherit Rd in 4-0
}
+}
+
// Format for load-register (literal) instructions.
class A64I_LDRlit<bits<2> opc, bit v,
dag outs, dag ins, string asmstr,
@@ -971,31 +983,123 @@ class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit> {
}
+// Format AdvSIMD bitwise extract
+class NeonI_BitExtract<bit q, bits<2> op2,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b101110;
+ let Inst{23-22} = op2;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ // imm4 in 14-11
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD perm
+class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD table lookup
+class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = op2;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin>
-{
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
- // Inherit Rm in 20-16
+ // Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
+// Format AdvSIMD 3 vector registers with different vector type
+class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11} = 0b0;
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD two registers and an element
+class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01111;
+ let Inst{23-22} = size;
+ // l in Inst{21}
+ // m in Inst{20}
+ // Inherit Rm in 19-16
+ let Inst{15-12} = opcode;
+ // h in Inst{11}
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
// Format AdvSIMD 1 vector register with modified immediate
class NeonI_1VModImm<bit q, bit op,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs,ins, asmstr, patterns, itin>
-{
+ : A64InstRd<outs,ins, asmstr, patterns, itin> {
bits<8> Imm;
bits<4> cmode;
let Inst{31} = 0b0;
@@ -1015,15 +1119,14 @@ class NeonI_1VModImm<bit q, bit op,
class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin>
-{
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = 0b1;
let Inst{29} = u;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21} = 0b1;
- // Inherit Rm in 20-16
+ // Inherit Rm in 20-16
let Inst{15-11} = opcode;
let Inst{10} = 0b1;
// Inherit Rn in 9-5
@@ -1035,6 +1138,98 @@ class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD 2 vector 1 immediate shift
+class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<7> Imm;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = u;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = Imm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD duplicate and insert
+class NeonI_copy<bit q, bit op, bits<4> imm4,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Imm5;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{20-16} = Imm5;
+ let Inst{15} = 0b0;
+ let Inst{14-11} = imm4;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD insert from element to vector
+class NeonI_insert<bit q, bit op,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<5> Imm5;
+ bits<4> Imm4;
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{20-16} = Imm5;
+ let Inst{15} = 0b0;
+ let Inst{14-11} = Imm4;
+ let Inst{10} = 0b1;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar pairwise
+class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD 2 vector across lanes
+class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
: A64InstRdn<outs, ins, asmstr, patterns, itin>
{
let Inst{31} = 0b0;
@@ -1042,13 +1237,253 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
let Inst{29} = u;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
+ let Inst{21-17} = 0b11000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar two registers miscellaneous
+class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins,
+ string asmstr, list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
let Inst{21-17} = 0b10000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD vector load/store multiple N-element structure
+class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011000;
+ let Inst{22} = l;
+ let Inst{21-16} = 0b000000;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load/store multiple N-element structure (post-index)
+class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011001;
+ let Inst{22} = l;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = size;
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load Single N-element structure to all lanes
+class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
+ dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011010;
+ let Inst{22} = 0b1;
+ let Inst{21} = r;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-13} = opcode;
+ let Inst{12} = 0b0;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD vector load/store Single N-element structure to/from one lane
+class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
+ dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtn<outs, ins, asmstr, patterns, itin>
+{
+ bits<4> lane;
+ let Inst{31} = 0b0;
+ let Inst{29-23} = 0b0011010;
+ let Inst{22} = l;
+ let Inst{21} = r;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-14} = op2_1;
+ let Inst{13} = op0;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD post-index vector load Single N-element structure to all lanes
+class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
+ dag ins, string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = q;
+ let Inst{29-23} = 0b0011011;
+ let Inst{22} = 0b1;
+ let Inst{21} = r;
+ // Inherit Rm in 20-16
+ let Inst{15-13} = opcode;
+ let Inst{12} = 0b0;
+ let Inst{11-10} = size;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD post-index vector load/store Single N-element structure
+// to/from one lane
+class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
+ dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRtnm<outs, ins, asmstr, patterns, itin>
+{
+ bits<4> lane;
+ let Inst{31} = 0b0;
+ let Inst{29-23} = 0b0011011;
+ let Inst{22} = l;
+ let Inst{21} = r;
+ // Inherit Rm in 20-16
+ let Inst{15-14} = op2_1;
+ let Inst{13} = op0;
+
+ // Inherit Rn in 9-5
+ // Inherit Rt in 4-0
+}
+
+// Format AdvSIMD 3 scalar registers with different type
+
+class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-30} = 0b01;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b1;
+ // Inherit Rm in 20-16
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar shift by immediate
+
+class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ bits<4> Imm4;
+ bits<3> Imm3;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = u;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-19} = Imm4;
+ let Inst{18-16} = Imm3;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0b1;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD crypto AES
+class NeonI_Crypto_AES<bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01001110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10100;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD crypto SHA
+class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01011110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10100;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
+// Format AdvSIMD crypto 3V SHA
+class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
+ dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
+ let Inst{31-24} = 0b01011110;
+ let Inst{23-22} = size;
+ let Inst{21} = 0b0;
+ // Inherit Rm in 20-16
+ let Inst{15} = 0b0;
+ let Inst{14-12} = opcode;
+ let Inst{11-10} = 0b00;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+
+// Format AdvSIMD scalar x indexed element
+class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
+ bits<4> opcode, dag outs, dag ins,
+ string asmstr, list<dag> patterns,
+ InstrItinClass itin>
+ : A64InstRdnm<outs, ins, asmstr, patterns, itin>
+{
+ let Inst{31} = 0b0;
+ let Inst{30} = 0b1;
+ let Inst{29} = u;
+ let Inst{28-24} = 0b11111;
+ let Inst{23} = szhi;
+ let Inst{22} = szlo;
+ // l in Inst{21}
+ // m in Instr{20}
+ // Inherit Rm in 19-16
+ let Inst{15-12} = opcode;
+ // h in Inst{11}
+ let Inst{10} = 0b0;
+ // Inherit Rn in 9-5
+ // Inherit Rd in 4-0
+}
+// Format AdvSIMD scalar copy - insert from element to scalar
+class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
+ list<dag> patterns, InstrItinClass itin>
+ : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
+ let Inst{28} = 0b1;
+}
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index d8f45eb..180110a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -29,7 +29,7 @@
#include <algorithm>
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
using namespace llvm;
@@ -68,43 +68,71 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
.addImm(A64SysReg::NZCV);
} else if (AArch64::GPR64RegClass.contains(DestReg)) {
- assert(AArch64::GPR64RegClass.contains(SrcReg));
- Opc = AArch64::ORRxxx_lsl;
- ZeroReg = AArch64::XZR;
+ if(AArch64::GPR64RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRxxx_lsl;
+ ZeroReg = AArch64::XZR;
+ } else{
+ assert(AArch64::FPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::GPR32RegClass.contains(DestReg)) {
- assert(AArch64::GPR32RegClass.contains(SrcReg));
- Opc = AArch64::ORRwww_lsl;
- ZeroReg = AArch64::WZR;
+ if(AArch64::GPR32RegClass.contains(SrcReg)){
+ Opc = AArch64::ORRwww_lsl;
+ ZeroReg = AArch64::WZR;
+ } else{
+ assert(AArch64::FPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR32RegClass.contains(DestReg)) {
- assert(AArch64::FPR32RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR32RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR32RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR64RegClass.contains(DestReg)) {
- assert(AArch64::FPR64RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
- .addReg(SrcReg);
- return;
+ if(AArch64::FPR64RegClass.contains(SrcReg)){
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
+ else {
+ assert(AArch64::GPR64RegClass.contains(SrcReg));
+ BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
+ .addReg(SrcReg);
+ return;
+ }
} else if (AArch64::FPR128RegClass.contains(DestReg)) {
assert(AArch64::FPR128RegClass.contains(SrcReg));
- // FIXME: there's no good way to do this, at least without NEON:
- // + There's no single move instruction for q-registers
- // + We can't create a spill slot and use normal STR/LDR because stack
- // allocation has already happened
- // + We can't go via X-registers with FMOV because register allocation has
- // already happened.
- // This may not be efficient, but at least it works.
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
- return;
+ // If NEON is enable, we use ORR to implement this copy.
+ // If NEON isn't available, emit STR and LDR to handle this.
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg);
+ return;
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
+ .addReg(SrcReg)
+ .addReg(AArch64::XSP)
+ .addImm(0x1ff & -16);
+
+ BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
+ .addReg(AArch64::XSP, RegState::Define)
+ .addReg(AArch64::XSP)
+ .addImm(16);
+ return;
+ }
} else {
llvm_unreachable("Unknown register class in copyPhysReg");
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 07289b0..23d81fc 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
@@ -125,6 +127,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+
//===----------------------------------------------------------------------===//
// Call sequence pseudo-instructions
//===----------------------------------------------------------------------===//
@@ -1274,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
-def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
+def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
sub_32)>;
//===-------------------------------
@@ -1978,6 +1982,13 @@ def fpz64 : Operand<f64>,
let DecoderMethod = "DecodeFPZeroOperand";
}
+def fpz64movi : Operand<i64>,
+ ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
+ let ParserMatchClass = fpzero_asmoperand;
+ let PrintMethod = "printFPZeroOperand";
+ let DecoderMethod = "DecodeFPZeroOperand";
+}
+
multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
(outs), ins, "fcmp\t$Rn, $Rm", [pattern],
@@ -2186,23 +2197,23 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and
// addition.
-let Predicates = [UseFusedMAC] in {
-def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+let Predicates = [HasFPARMv8, UseFusedMAC] in {
+def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
+def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
+def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
+def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
+def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
}
@@ -2342,6 +2353,7 @@ defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
@@ -2350,6 +2362,7 @@ def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
+}
multiclass A64I_inttofp<bit o0, string asmop> {
def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
@@ -2361,6 +2374,7 @@ multiclass A64I_inttofp<bit o0, string asmop> {
defm S : A64I_inttofp<0b0, "scvtf">;
defm U : A64I_inttofp<0b1, "ucvtf">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
@@ -2369,16 +2383,19 @@ def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
+}
def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
+let Predicates = [HasFPARMv8] in {
def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
+}
def lane1_asmoperand : AsmOperandClass {
let Name = "Lane1";
@@ -2401,11 +2418,13 @@ let DecoderMethod = "DecodeFMOVLaneInstruction" in {
"fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>;
}
+let Predicates = [HasFPARMv8] in {
def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
(FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
(FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
+}
//===----------------------------------------------------------------------===//
// Floating-point immediate instructions
@@ -2499,11 +2518,15 @@ let mayLoad = 1 in {
def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
}
+let Predicates = [HasFPARMv8] in {
def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+}
let mayLoad = 1 in {
+ let Predicates = [HasFPARMv8] in {
def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
+ }
def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
@@ -3097,6 +3120,7 @@ defm LS32
defm LS64
: A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
+let Predicates = [HasFPARMv8] in {
// STR/LDR to/from a B register
defm LSFP8
: A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
@@ -3115,6 +3139,7 @@ defm LSFP64
defm LSFP128
: A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
qword_addrparams>;
+}
//===------------------------------
// 2.3 Signed loads
@@ -3570,10 +3595,13 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
+
+let Predicates = [HasFPARMv8] in {
defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
"LSFPPair128">;
+}
def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
@@ -5162,4 +5190,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
// Advanced SIMD (NEON) Support
//
-include "AArch64InstrNEON.td" \ No newline at end of file
+include "AArch64InstrNEON.td"
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 98b9e3e..d71749d 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -41,6 +41,37 @@ def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
+def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
+
+def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
+def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
+def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
+def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
+def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
+def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
+
+def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
+def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
+def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
+def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
+ [SDTCisVec<0>]>>;
+def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
+def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
+
+def SDT_assertext : SDTypeProfile<1, 1,
+ [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
+def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
+def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
+
//===----------------------------------------------------------------------===//
// Multiclasses
//===----------------------------------------------------------------------===//
@@ -48,8 +79,7 @@ def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
string asmop, SDPatternOperator opnode8B,
SDPatternOperator opnode16B,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, size, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -70,8 +100,7 @@ multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
-{
+ bit Commutable = 0> {
let isCommutable = Commutable in {
def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -105,8 +134,7 @@ multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -127,8 +155,7 @@ multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
string asmop, SDPatternOperator opnode,
bit Commutable = 0>
- : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable>
-{
+ : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
let isCommutable = Commutable in {
def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
(outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
@@ -146,8 +173,7 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
SDPatternOperator opnode4S,
SDPatternOperator opnode2D,
ValueType ResTy2S, ValueType ResTy4S,
- ValueType ResTy2D, bit Commutable = 0>
-{
+ ValueType ResTy2D, bit Commutable = 0> {
let isCommutable = Commutable in {
def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
(outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
@@ -206,8 +232,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
// two operands constraints.
class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
- RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
- SDPatternOperator opnode>
+ RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
+ bits<5> opcode, SDPatternOperator opnode>
: NeonI_3VSame<q, u, size, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
@@ -312,26 +338,24 @@ defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
// ORR disassembled as MOV if Vn==Vm
// Vector Move - register
-// Alias for ORR if Vn=Vm and it is the preferred syntax
+// Alias for ORR if Vn=Vm.
+// FIXME: This is actually the preferred syntax but TableGen can't deal with
+// custom printing of aliases.
def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
- (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
+ (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
- (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
-
-def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
- ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
- ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1));
- unsigned EltBits;
- uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(),
- OpCmodeConstVal->getZExtValue(), EltBits);
- return (EltBits == 8 && EltVal == 0xff);
-}]>;
+ (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
+// The MOVI instruction takes two immediate operands. The first is the
+// immediate encoding, while the second is the cmode. A cmode of 14, or
+// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
+def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
+def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
def Neon_not8B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>;
+ (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
def Neon_not16B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>;
+ (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
(or node:$Rn, (Neon_not8B node:$Rm))>;
@@ -447,6 +471,9 @@ multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
(v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
(INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
+ def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
+ (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
+ (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
(v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
(INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
@@ -562,7 +589,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
// NeonI_compare_aliases class: swaps register operands to implement
// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
class NeonI_compare_aliases<string asmop, string asmlane,
- Instruction inst, RegisterClass VPRC>
+ Instruction inst, RegisterOperand VPRC>
: NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
", $Rm" # asmlane,
(inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
@@ -1023,6 +1050,20 @@ defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
return (HasShift && !ShiftOnesIn);
}]>;
+def neon_uimm1_asmoperand : AsmOperandClass
+{
+ let Name = "UImm1";
+ let PredicateMethod = "isUImm<1>";
+ let RenderMethod = "addImmOperands";
+}
+
+def neon_uimm2_asmoperand : AsmOperandClass
+{
+ let Name = "UImm2";
+ let PredicateMethod = "isUImm<2>";
+ let RenderMethod = "addImmOperands";
+}
+
def neon_uimm8_asmoperand : AsmOperandClass
{
let Name = "UImm8";
@@ -1032,7 +1073,7 @@ def neon_uimm8_asmoperand : AsmOperandClass
def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
let ParserMatchClass = neon_uimm8_asmoperand;
- let PrintMethod = "printNeonUImm8Operand";
+ let PrintMethod = "printUImmHexOperand";
}
def neon_uimm64_mask_asmoperand : AsmOperandClass
@@ -1057,7 +1098,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
@@ -1070,7 +1111,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_LSL_operand:$Simm))))],
@@ -1084,7 +1125,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
@@ -1097,7 +1138,7 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (timm:$Imm),
(neon_mov_imm_LSLH_operand:$Simm))))],
@@ -1117,7 +1158,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (v2i32 VPR64:$src),
(v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm,
@@ -1131,7 +1172,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (v4i32 VPR128:$src),
(v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm,
@@ -1146,7 +1187,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins VPR64:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.4h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
[(set (v4i16 VPR64:$Rd),
(v4i16 (opnode (v4i16 VPR64:$src),
(v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm,
@@ -1160,7 +1201,7 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins VPR128:$src, neon_uimm8:$Imm,
neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, " $Rd.8h, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
[(set (v8i16 VPR128:$Rd),
(v8i16 (opnode (v8i16 VPR128:$src),
(v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm,
@@ -1180,7 +1221,7 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
(outs VPR64:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.2s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
[(set (v2i32 VPR64:$Rd),
(v2i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
@@ -1193,7 +1234,7 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
(outs VPR128:$Rd),
(ins neon_uimm8:$Imm,
neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, " $Rd.4s, $Imm$Simm"),
+ !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
[(set (v4i32 VPR128:$Rd),
(v4i32 (opnode (timm:$Imm),
(neon_mov_imm_MSL_operand:$Simm))))],
@@ -1315,8 +1356,8 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
}
class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
- Instruction inst, RegisterClass VPRC>
- : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
+ Instruction inst, RegisterOperand VPRC>
+ : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
(inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
// Aliases for Vector Move Immediate Shifted
@@ -1382,9 +1423,8 @@ let isReMaterializable = 1 in {
def MOVIdi : NeonI_1VModImm<0b0, 0b1,
(outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
"movi\t $Rd, $Imm",
- [(set (f64 FPR64:$Rd),
- (f64 (bitconvert
- (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))],
+ [(set (v1i64 FPR64:$Rd),
+ (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
NoItinerary> {
let cmode = 0b1110;
}
@@ -1392,7 +1432,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
// Vector Floating Point Move Immediate
-class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
+class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
Operand immOpType, bit q, bit op>
: NeonI_1VModImm<q, op,
(outs VPRC:$Rd), (ins immOpType:$Imm),
@@ -1409,49 +1449,3339 @@ def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
}
-// Scalar Arithmetic
+// Vector Shift (Immediate)
+// Immediate in [0, 63]
+def imm0_63 : Operand<i32> {
+ let ParserMatchClass = uimm6_asmoperand;
+}
-class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
+// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
+// as follows:
+//
+// Offset Encoding
+// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
+// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
+// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
+// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
+//
+// The shift right immediate amount, in the range 1 to element bits, is computed
+// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
+// to element bits - 1, is computed as UInt(immh:immb) - Offset.
+
+class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShrImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShrImm" # OFFSET;
+}
+
+class shr_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftRightImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
+}
+
+def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
+def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
+def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
+def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
+
+def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
+def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
+def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
+def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
+
+class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
+ let Name = "ShlImm" # OFFSET;
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "ShlImm" # OFFSET;
+}
+
+class shl_imm<string OFFSET> : Operand<i32> {
+ let EncoderMethod = "getShiftLeftImm" # OFFSET;
+ let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
+}
+
+def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
+def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
+def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
+def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
+
+def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
+def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
+def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
+def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
+
+class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd),
+ (Ty (OpNode (Ty VPRC:$Rn),
+ (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
+ // 64-bit vector types.
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // 128-bit vector types.
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ }
+}
+
+multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left
+defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
+
+// Shift right
+defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
+defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
+
+def Neon_High16B : PatFrag<(ops node:$in),
+ (extract_subvector (v16i8 node:$in), (iPTR 8))>;
+def Neon_High8H : PatFrag<(ops node:$in),
+ (extract_subvector (v8i16 node:$in), (iPTR 4))>;
+def Neon_High4S : PatFrag<(ops node:$in),
+ (extract_subvector (v4i32 node:$in), (iPTR 2))>;
+def Neon_High2D : PatFrag<(ops node:$in),
+ (extract_subvector (v2i64 node:$in), (iPTR 1))>;
+def Neon_High4float : PatFrag<(ops node:$in),
+ (extract_subvector (v4f32 node:$in), (iPTR 2))>;
+def Neon_High2double : PatFrag<(ops node:$in),
+ (extract_subvector (v2f64 node:$in), (iPTR 1))>;
+
+def Neon_Low16B : PatFrag<(ops node:$in),
+ (v8i8 (extract_subvector (v16i8 node:$in),
+ (iPTR 0)))>;
+def Neon_Low8H : PatFrag<(ops node:$in),
+ (v4i16 (extract_subvector (v8i16 node:$in),
+ (iPTR 0)))>;
+def Neon_Low4S : PatFrag<(ops node:$in),
+ (v2i32 (extract_subvector (v4i32 node:$in),
+ (iPTR 0)))>;
+def Neon_Low2D : PatFrag<(ops node:$in),
+ (v1i64 (extract_subvector (v2i64 node:$in),
+ (iPTR 0)))>;
+def Neon_Low4float : PatFrag<(ops node:$in),
+ (v2f32 (extract_subvector (v4f32 node:$in),
+ (iPTR 0)))>;
+def Neon_Low2double : PatFrag<(ops node:$in),
+ (v1f64 (extract_subvector (v2f64 node:$in),
+ (iPTR 0)))>;
+
+class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, ValueType DestTy, ValueType SrcTy,
+ Operand ImmTy, SDPatternOperator ExtOp>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR64:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [(set (DestTy VPR128:$Rd),
+ (DestTy (shl
+ (DestTy (ExtOp (SrcTy VPR64:$Rn))),
+ (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, ValueType DestTy, ValueType SrcTy,
+ int StartIndex, Operand ImmTy,
+ SDPatternOperator ExtOp, PatFrag getTop>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR128:$Rn, ImmTy:$Imm),
+ asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [(set (DestTy VPR128:$Rd),
+ (DestTy (shl
+ (DestTy (ExtOp
+ (SrcTy (getTop VPR128:$Rn)))),
+ (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
+ SDNode ExtOp> {
+ // 64-bit vector types.
+ def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
+ shl_imm8, ExtOp> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
+ shl_imm16, ExtOp> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
+ shl_imm32, ExtOp> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
+ 8, shl_imm8, ExtOp, Neon_High16B> {
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ }
+
+ def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
+ 4, shl_imm16, ExtOp, Neon_High8H> {
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ }
+
+ def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
+ 2, shl_imm32, ExtOp, Neon_High4S> {
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ }
+
+ // Use other patterns to match when the immediate is 0.
+ def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
+
+ def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
+
+ def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
+
+ def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
+
+ def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
+
+ def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
+ (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
+}
+
+// Shift left long
+defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
+defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
+
+// Rounding/Saturating shift
+class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary>;
+
+// shift right (vector by immediate)
+multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ // 64-bit vector types.
+ def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types.
+ def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right
+defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
+ int_aarch64_neon_vsrshr>;
+defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
+ int_aarch64_neon_vurshr>;
+
+// Saturating shift left unsigned
+defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
+
+// Saturating shift left
+defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
+defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
+
+class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDNode OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn),
+ (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// Shift Right accumulate
+multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
+ def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift right and accumulate
+defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
+defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
+
+// Rounding shift accumulate
+class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
+ (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator OpNode> {
+ def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ OpNode> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ OpNode> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ OpNode> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ OpNode> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Rounding shift right and accumulate
+defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
+defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
+
+// Shift insert by immediate
+class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
+ SDPatternOperator OpNode>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// shift left insert (vector by immediate)
+multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
+ int_aarch64_neon_vsli> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
+ int_aarch64_neon_vsli> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
+ int_aarch64_neon_vsli> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
+ int_aarch64_neon_vsli> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// shift right insert (vector by immediate)
+multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
+ // 64-bit vector types.
+ def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // 128-bit vector types
+ def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
+ int_aarch64_neon_vsri> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
+ int_aarch64_neon_vsri> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
+ int_aarch64_neon_vsri> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
+ int_aarch64_neon_vsri> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Shift left and insert
+defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
+
+// Shift right and insert
+defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
+
+class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary>;
+
+class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
+ string SrcT, Operand ImmTy>
+ : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
+ (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// left long shift by immediate
+multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
+ def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+
+ // Shift Narrow High
+ def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
+ shr_imm8> {
+ let Inst{22-19} = 0b0001;
+ }
+
+ def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
+ shr_imm16> {
+ let Inst{22-20} = 0b001;
+ }
+
+ def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
+ shr_imm32> {
+ let Inst{22-21} = 0b01;
+ }
+}
+
+// Shift right narrow
+defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
+
+// Shift right narrow (prefix Q is saturating, prefix R is rounding)
+defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
+defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
+defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
+defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
+defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
+defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
+defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
+
+def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
+ (v2i64 (concat_vectors (v1i64 node:$Rm),
+ (v1i64 node:$Rn)))>;
+def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
+ (v8i16 (concat_vectors (v4i16 node:$Rm),
+ (v4i16 node:$Rn)))>;
+def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
+ (v4i32 (concat_vectors (v2i32 node:$Rm),
+ (v2i32 node:$Rn)))>;
+def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
+ (v4f32 (concat_vectors (v2f32 node:$Rm),
+ (v2f32 node:$Rn)))>;
+def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
+ (v2f64 (concat_vectors (v1f64 node:$Rm),
+ (v1f64 node:$Rn)))>;
+
+def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (srl (v8i16 node:$lhs),
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (srl (v4i32 node:$lhs),
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (srl (v2i64 node:$lhs),
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
+ (v8i16 (sra (v8i16 node:$lhs),
+ (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
+ (v4i32 (sra (v4i32 node:$lhs),
+ (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
+def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
+ (v2i64 (sra (v2i64 node:$lhs),
+ (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
+
+// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
+multiclass Neon_shiftNarrow_patterns<string shr> {
+ def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
+ (i32 shr_imm8:$Imm)))),
+ (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
+ (i32 shr_imm16:$Imm)))),
+ (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
+ (i32 shr_imm32:$Imm)))),
+ (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
+ VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
+ (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
+ VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
+ (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
+ (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
+ VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
+ (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
+ def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
+ (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
+ (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
+ (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
+
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v8i8
+ (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
+ (!cast<Instruction>(prefix # "_16B")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v4i16
+ (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
+ (!cast<Instruction>(prefix # "_8H")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+ def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (v2i32
+ (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
+ (!cast<Instruction>(prefix # "_4S")
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, imm:$Imm)>;
+}
+
+defm : Neon_shiftNarrow_patterns<"lshr">;
+defm : Neon_shiftNarrow_patterns<"ashr">;
+
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
+defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
+
+// Convert fix-point and float-pointing
+class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
+ RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
+ Operand ImmTy, SDPatternOperator IntOp>
+ : NeonI_2VShiftImm<q, u, opcode,
+ (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
+ asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
+ [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
+ (i32 ImmTy:$Imm))))],
+ NoItinerary>;
+
+multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
+ SDPatternOperator IntOp> {
+ def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
+ shr_imm32, IntOp> {
+ let Inst{22-21} = 0b01;
+ }
+
+ def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
+ shr_imm64, IntOp> {
+ let Inst{22} = 0b1;
+ }
+}
+
+// Convert fixed-point to floating-point
+defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
+ int_arm_neon_vcvtfxs2fp>;
+defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
+ int_arm_neon_vcvtfxu2fp>;
+
+// Convert floating-point to fixed-point
+defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
+ int_arm_neon_vcvtfp2fxs>;
+defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
+ int_arm_neon_vcvtfp2fxu>;
+
+multiclass Neon_sshll2_0<SDNode ext>
+{
+ def _v8i8 : PatFrag<(ops node:$Rn),
+ (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
+ def _v4i16 : PatFrag<(ops node:$Rn),
+ (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
+ def _v2i32 : PatFrag<(ops node:$Rn),
+ (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
+}
+
+defm NI_sext_high : Neon_sshll2_0<sext>;
+defm NI_zext_high : Neon_sshll2_0<zext>;
+
+
+//===----------------------------------------------------------------------===//
+// Multiclasses for NeonI_Across
+//===----------------------------------------------------------------------===//
+
+// Variant 1
+
+multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+{
+ def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
+ (outs FPR16:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.8b",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v8i8 VPR64:$Rn))))],
NoItinerary>;
-multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
- string asmop, bit Commutable = 0>
+ def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
+ (outs FPR16:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.16b",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.4h",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.8h",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ // _1d2s doesn't exist!
+
+ def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
+ (outs FPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1i64 FPR64:$Rd),
+ (v1i64 (opnode (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
+defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
+
+// Variant 2
+
+multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
{
+ def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.8b",
+ [(set (v1i8 FPR8:$Rd),
+ (v1i8 (opnode (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
+ (outs FPR8:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.16b",
+ [(set (v1i8 FPR8:$Rd),
+ (v1i8 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd, $Rn.4h",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
+ (outs FPR16:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.8h",
+ [(set (v1i16 FPR16:$Rd),
+ (v1i16 (opnode (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ // _1s2s doesn't exist!
+
+ def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
+defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
+
+defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
+defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
+
+defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
+
+// Variant 3
+
+multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
+ string asmop, SDPatternOperator opnode> {
+ def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
+ (outs FPR32:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd, $Rn.4s",
+ [(set (v1f32 FPR32:$Rd),
+ (v1f32 (opnode (v4f32 VPR128:$Rn))))],
+ NoItinerary>;
+}
+
+defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
+ int_aarch64_neon_vmaxnmv>;
+defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
+ int_aarch64_neon_vminnmv>;
+
+defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
+ int_aarch64_neon_vmaxv>;
+defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
+ int_aarch64_neon_vminv>;
+
+// The followings are for instruction class (Perm)
+
+class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
+ string asmop, RegisterOperand OpVPR, string OpS,
+ SDPatternOperator opnode, ValueType Ty>
+ : NeonI_Perm<q, size, opcode,
+ (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (Ty OpVPR:$Rd),
+ (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
+ NoItinerary>;
+
+multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
+ VPR64, "8b", opnode, v8i8>;
+ def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
+ VPR128, "16b",opnode, v16i8>;
+ def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
+ VPR64, "4h", opnode, v4i16>;
+ def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
+ VPR128, "8h", opnode, v8i16>;
+ def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
+ VPR64, "2s", opnode, v2i32>;
+ def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
+ VPR128, "4s", opnode, v4i32>;
+ def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
+ VPR128, "2d", opnode, v2i64>;
+}
+
+defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
+defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
+defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
+defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
+defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
+defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
+
+multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
+ def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
+ (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
+
+ def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
+ (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
+
+ def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
+ (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
+}
+
+defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
+defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
+defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
+defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
+defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
+defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
+
+// The followings are for instruction class (3V Diff)
+
+// normal long/long2 pattern
+class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator ext,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
+ (ResTy (ext (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
+ string asmop, SDPatternOperator opnode,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, sext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, sext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, sext, VPR64, v2i64, v2i32>;
+ }
+}
+
+multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
+ }
+}
+
+multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, zext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, zext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, zext, VPR64, v2i64, v2i32>;
+ }
+}
+
+multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
+ }
+}
+
+defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
+defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
+
+defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
+defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
+
+defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
+defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
+
+defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
+defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
+
+// normal wide/wide2 pattern
+class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator ext,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (ResTy VPR128:$Rn),
+ (ResTy (ext (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, sext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, sext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, sext, VPR64, v2i64, v2i32>;
+}
+
+defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
+defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
+
+multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
+}
+
+defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
+defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
+
+multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, zext, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, zext, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, zext, VPR64, v2i64, v2i32>;
+}
+
+defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
+defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
+
+multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
+}
+
+defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
+defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
+
+// Get the high half part of the vector element.
+multiclass NeonI_get_high {
+ def _8h : PatFrag<(ops node:$Rn),
+ (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
+ (v8i16 (Neon_vdup (i32 8)))))))>;
+ def _4s : PatFrag<(ops node:$Rn),
+ (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
+ (v4i32 (Neon_vdup (i32 16)))))))>;
+ def _2d : PatFrag<(ops node:$Rn),
+ (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
+ (v2i64 (Neon_vdup (i32 32)))))))>;
+}
+
+defm NI_get_hi : NeonI_get_high;
+
+// pattern for addhn/subhn with 2 operands
+class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator get_hi,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR64:$Rd),
+ (ResTy (get_hi
+ (OpTy (opnode (OpTy VPR128:$Rn),
+ (OpTy VPR128:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
+ opnode, NI_get_hi_8h, v8i8, v8i16>;
+ def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
+ opnode, NI_get_hi_4s, v4i16, v4i32>;
+ def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
+ opnode, NI_get_hi_2d, v2i32, v2i64>;
+ }
+}
+
+defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
+defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
+
+// pattern for operation with 2 operands
+class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
+ NoItinerary>;
+
+// normal narrow pattern
+multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
+ opnode, VPR64, VPR128, v8i8, v8i16>;
+ def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
+ opnode, VPR64, VPR128, v4i16, v4i32>;
+ def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
+ opnode, VPR64, VPR128, v2i32, v2i64>;
+ }
+}
+
+defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
+defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
+
+// pattern for acle intrinsic with 3 operands
+class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let neverHasSideEffects = 1;
+}
+
+multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
+ def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
+ def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
+ def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
+}
+
+defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
+defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
+
+defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
+defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
+
+// Patterns have to be separate because there's a SUBREG_TO_REG in the output
+// part.
+class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
+ SDPatternOperator coreop>
+ : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
+ (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
+ (SrcTy VPR128:$Rm)))))),
+ (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ VPR128:$Rn, VPR128:$Rm)>;
+
+// addhn2 patterns
+def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
+ BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
+ BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
+ BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
+
+// subhn2 patterns
+def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
+ BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
+ BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
+def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
+ BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
+
+// raddhn2 patterns
+def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
+def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
+
+// rsubhn2 patterns
+def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
+def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
+
+// pattern that need to extend result
+class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType OpSTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
+ (OpTy OpVPR:$Rm))))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR64, v8i16, v8i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR64, v4i32, v4i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR64, v2i64, v2i32, v2i32>;
+ }
+}
+
+defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
+defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
+
+multiclass NeonI_Op_High<SDPatternOperator op> {
+ def _16B : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v8i8 (Neon_High16B node:$Rn)),
+ (v8i8 (Neon_High16B node:$Rm)))>;
+ def _8H : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v4i16 (Neon_High8H node:$Rn)),
+ (v4i16 (Neon_High8H node:$Rm)))>;
+ def _4S : PatFrag<(ops node:$Rn, node:$Rm),
+ (op (v2i32 (Neon_High4S node:$Rn)),
+ (v2i32 (Neon_High4S node:$Rm)))>;
+}
+
+defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
+defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
+defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
+defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
+defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
+defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
+
+multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ VPR128, v8i16, v16i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ VPR128, v4i32, v8i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ VPR128, v2i64, v4i32, v2i32>;
+ }
+}
+
+defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
+defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
+
+// For pattern that need two operators being chained.
+class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode, SDPatternOperator subop,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType OpSTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode
+ (ResTy VPR128:$src),
+ (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
+ (OpTy OpVPR:$Rm))))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, SDPatternOperator subop>{
+ def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, subop, VPR64, v8i16, v8i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, subop, VPR64, v4i32, v4i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, subop, VPR64, v2i64, v2i32, v2i32>;
+}
+
+defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
+ add, int_arm_neon_vabds>;
+defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
+ add, int_arm_neon_vabdu>;
+
+multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, string subop> {
+ def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ opnode, !cast<PatFrag>(subop # "_16B"),
+ VPR128, v8i16, v16i8, v8i8>;
+ def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, !cast<PatFrag>(subop # "_8H"),
+ VPR128, v4i32, v8i16, v4i16>;
+ def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, !cast<PatFrag>(subop # "_4S"),
+ VPR128, v2i64, v4i32, v2i32>;
+}
+
+defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
+ "NI_sabdl_hi">;
+defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
+ "NI_uabdl_hi">;
+
+// Long pattern with 2 operands
+multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR128, VPR64, v8i16, v8i8>;
+ def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR128, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR128, VPR64, v2i64, v2i32>;
+ }
+}
+
+defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
+defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
+
+class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
+ NoItinerary>;
+
+multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
let isCommutable = Commutable in {
- def bbb : NeonI_Scalar3Same<u, 0b00, opcode,
- (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ v2i64, v4i32>;
+ }
+}
+
+defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
+ "NI_smull_hi", 1>;
+defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
+ "NI_umull_hi", 1>;
+
+// Long pattern with 3 operands
+class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator opnode,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (opnode
+ (ResTy VPR128:$src),
+ (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, v8i16, v8i8>;
+ def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, v2i64, v2i32>;
+}
+
+def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (add node:$Rd,
+ (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
+
+def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (add node:$Rd,
+ (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
+
+def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (sub node:$Rd,
+ (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
+
+def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
+ (sub node:$Rd,
+ (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
+
+defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
+defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
+
+defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
+defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
+
+class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS,
+ SDPatternOperator subop, SDPatternOperator opnode,
+ RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy>
+ : NeonI_3VDiff<q, u, size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (subop
+ (ResTy VPR128:$src),
+ (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator subop, string opnode> {
+ def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ subop, !cast<PatFrag>(opnode # "_16B"),
+ VPR128, v8i16, v16i8>;
+ def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ subop, !cast<PatFrag>(opnode # "_8H"),
+ VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ subop, !cast<PatFrag>(opnode # "_4S"),
+ VPR128, v2i64, v4i32>;
+}
+
+defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
+ add, "NI_smull_hi">;
+defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
+ add, "NI_umull_hi">;
+
+defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
+ sub, "NI_smull_hi">;
+defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
+ sub, "NI_umull_hi">;
+
+multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, int_arm_neon_vqdmull,
+ VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, int_arm_neon_vqdmull,
+ VPR64, v2i64, v2i32>;
+}
+
+defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
+ int_arm_neon_vqadds>;
+defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
+ int_arm_neon_vqsubs>;
+
+multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
+ opnode, VPR128, VPR64, v4i32, v4i16>;
+ def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
+ opnode, VPR128, VPR64, v2i64, v2i32>;
+ }
+}
+
+defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
+ int_arm_neon_vqdmull, 1>;
+
+multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ !cast<PatFrag>(opnode # "_8H"),
+ v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ !cast<PatFrag>(opnode # "_4S"),
+ v2i64, v4i32>;
+ }
+}
+
+defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
+ "NI_qdmull_hi", 1>;
+
+multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode> {
+ def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
+ opnode, NI_qdmull_hi_8H,
+ VPR128, v4i32, v8i16>;
+ def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
+ opnode, NI_qdmull_hi_4S,
+ VPR128, v2i64, v4i32>;
+}
+
+defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
+ int_arm_neon_vqadds>;
+defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
+ int_arm_neon_vqsubs>;
+
+multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
+ SDPatternOperator opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
+ opnode, VPR128, VPR64, v8i16, v8i8>;
+
+ def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
+ asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d",
+ [], NoItinerary>;
+ }
+}
+
+defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>;
+
+multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
+ string opnode, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
+ !cast<PatFrag>(opnode # "_16B"),
+ v8i16, v16i8>;
+
+ def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
+ 1>;
+
+// End of implementation for instruction class (3V Diff)
+
+// The followings are vector load/store multiple N-element structure
+// (class SIMD lselem).
+
+// ld1: load multiple 1-element structure to 1/2/3/4 registers.
+// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
+// The structure consists of a sequence of sets of N values.
+// The first element of the structure is placed in the first lane
+// of the first first vector, the second element in the first lane
+// of the second vector, and so on.
+// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
+// the three 64-bit vectors list {BA, DC, FE}.
+// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
+// 64-bit vectors list {DA, EB, FC}.
+// Store instructions store multiple structure to N registers like load.
+
+
+class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdStMult<q, 1, opcode, size,
+ (outs VecList:$Rt), (ins GPR64xsp:$Rn),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
+ def _8B : NeonI_LDVList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_LDVList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_LDVList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _16B : NeonI_LDVList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_LDVList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_LDVList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_LDVList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
+defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
+def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
+
+defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
+
+defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
+
+defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
+
+// Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
+defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
+def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
+
+defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
+def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
+
+defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
+def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
+
+class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdStMult<q, 0, opcode, size,
+ (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayStore = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
+ def _8B : NeonI_STVList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_STVList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_STVList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _16B : NeonI_STVList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_STVList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_STVList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_STVList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Store multiple N-element structures from N registers (N = 1,2,3,4)
+defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
+def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
+
+defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
+
+defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
+
+defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
+
+// Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
+defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
+def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
+
+defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
+def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
+
+defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
+def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
+
+def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
+
+def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
+
+def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
+def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
+
+def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
+
+def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
+
+def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
+def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
+
+def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
+ (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
+ (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
+ (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
+def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
+ (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
+
+def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
+ (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
+ (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
+
+def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
+ (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
+def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
+ (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
+
+// End of vector load/store multiple N-element structure(class SIMD lselem)
+
+// The followings are post-index vector load/store multiple N-element
+// structure(class SIMD lselem-post)
+def exact1_asmoperand : AsmOperandClass {
+ let Name = "Exact1";
+ let PredicateMethod = "isExactImm<1>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
+ let ParserMatchClass = exact1_asmoperand;
+}
+
+def exact2_asmoperand : AsmOperandClass {
+ let Name = "Exact2";
+ let PredicateMethod = "isExactImm<2>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
+ let ParserMatchClass = exact2_asmoperand;
+}
+
+def exact3_asmoperand : AsmOperandClass {
+ let Name = "Exact3";
+ let PredicateMethod = "isExactImm<3>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
+ let ParserMatchClass = exact3_asmoperand;
+}
+
+def exact4_asmoperand : AsmOperandClass {
+ let Name = "Exact4";
+ let PredicateMethod = "isExactImm<4>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
+ let ParserMatchClass = exact4_asmoperand;
+}
+
+def exact6_asmoperand : AsmOperandClass {
+ let Name = "Exact6";
+ let PredicateMethod = "isExactImm<6>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
+ let ParserMatchClass = exact6_asmoperand;
+}
+
+def exact8_asmoperand : AsmOperandClass {
+ let Name = "Exact8";
+ let PredicateMethod = "isExactImm<8>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
+ let ParserMatchClass = exact8_asmoperand;
+}
+
+def exact12_asmoperand : AsmOperandClass {
+ let Name = "Exact12";
+ let PredicateMethod = "isExactImm<12>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
+ let ParserMatchClass = exact12_asmoperand;
+}
+
+def exact16_asmoperand : AsmOperandClass {
+ let Name = "Exact16";
+ let PredicateMethod = "isExactImm<16>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
+ let ParserMatchClass = exact16_asmoperand;
+}
+
+def exact24_asmoperand : AsmOperandClass {
+ let Name = "Exact24";
+ let PredicateMethod = "isExactImm<24>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
+ let ParserMatchClass = exact24_asmoperand;
+}
+
+def exact32_asmoperand : AsmOperandClass {
+ let Name = "Exact32";
+ let PredicateMethod = "isExactImm<32>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
+ let ParserMatchClass = exact32_asmoperand;
+}
+
+def exact48_asmoperand : AsmOperandClass {
+ let Name = "Exact48";
+ let PredicateMethod = "isExactImm<48>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
+ let ParserMatchClass = exact48_asmoperand;
+}
+
+def exact64_asmoperand : AsmOperandClass {
+ let Name = "Exact64";
+ let PredicateMethod = "isExactImm<64>";
+ let RenderMethod = "addImmOperands";
+}
+def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
+ let ParserMatchClass = exact64_asmoperand;
+}
+
+multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"),
+ ImmTy, asmop>;
+
+ defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
+defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "ld1">;
+
+defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
+
+defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "ld3">;
+
+defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "ld1">;
+defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "ld1">;
+
+defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "ld1">;
+defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "ld1">;
+
+defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "ld1">;
+defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "ld1">;
+
+multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
+ DecoderMethod = "DecodeVLDSTPostInstruction" in {
+ def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
+ Operand ImmTy2, string asmop> {
+ defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
+
+ defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ ImmTy, asmop>;
+
+ defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ ImmTy, asmop>;
+
+ defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ ImmTy2, asmop>;
+
+ defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ ImmTy2, asmop>;
+
+ defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ ImmTy2, asmop>;
+
+ defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ ImmTy2, asmop>;
+}
+
+// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
+defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
+defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
+ "st1">;
+
+defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
+
+defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
+ "st3">;
+
+defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
+
+// Post-index load multiple 1-element structures from N consecutive registers
+// (N = 2,3,4)
+defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
+ "st1">;
+defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
+ uimm_exact16, "st1">;
+
+defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
+ "st1">;
+defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
+ uimm_exact24, "st1">;
+
+defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
+ "st1">;
+defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
+ uimm_exact32, "st1">;
+
+// End of post-index vector load/store multiple N-element structure
+// (class SIMD lselem-post)
+
+// The followings are vector load/store single N-element structure
+// (class SIMD lsone).
+def neon_uimm0_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm == 0;}]> {
+ let ParserMatchClass = neon_uimm0_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm1_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 2;}]> {
+ let ParserMatchClass = neon_uimm1_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm2_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 4;}]> {
+ let ParserMatchClass = neon_uimm2_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm3_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 8;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+def neon_uimm4_bare : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 16;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmBareOperand";
+}
+
+class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
+ RegisterOperand VecList, string asmop>
+ : NeonI_LdOne_Dup<q, r, opcode, size,
+ (outs VecList:$Rt), (ins GPR64xsp:$Rn),
+ asmop # "\t$Rt, [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+}
+
+multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
+ def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"), asmop>;
+
+ def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"), asmop>;
+
+ def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"), asmop>;
+
+ def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "1D_operand"), asmop>;
+
+ def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"), asmop>;
+
+ def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"), asmop>;
+
+ def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"), asmop>;
+
+ def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"), asmop>;
+}
+
+// Load single 1-element structure to all lanes of 1 register
+defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
+
+// Load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
+defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
+defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
+
+
+class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
+ Instruction INST>
+ : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
+ (VTy (INST GPR64xsp:$Rn))>;
+
+// Match all LD1R instructions
+def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
+
+def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
+
+def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
+
+def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
+
+def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
+def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
+
+def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
+def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
+
+def : LD1R_pattern<v1i64, i64, load, LD1R_1D>;
+def : LD1R_pattern<v1f64, f64, load, LD1R_1D>;
+
+def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
+def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
+
+
+multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
+ RegisterClass RegList> {
+ defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
+ defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
+ defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
+ defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
+}
+
+// Special vector list operand of 128-bit vectors with bare layout.
+// i.e. only show ".b", ".h", ".s", ".d"
+defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
+defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
+defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
+defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
+
+class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane<1, r, op2_1, op0,
+ (outs VList:$Rt),
+ (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn]",
+ [],
+ NoItinerary> {
+ let mayLoad = 1;
+ let neverHasSideEffects = 1;
+ let hasExtraDefRegAllocReq = 1;
+ let Constraints = "$src = $Rt";
+}
+
+multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
+ def _B : NeonI_LDN_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H : NeonI_LDN_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S : NeonI_LDN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D : NeonI_LDN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Load single 1-element structure to one lane of 1 register.
+defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
+
+// Load single N-element structure to one lane of N consecutive registers
+// (N = 2,3,4)
+defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
+defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
+defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
+
+multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
+ Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
+ Instruction INST> {
+ def : Pat<(VTy (vector_insert (VTy VPR64:$src),
+ (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
+ (VTy (EXTRACT_SUBREG
+ (INST GPR64xsp:$Rn,
+ (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
+ ImmOp:$lane),
+ sub_64))>;
+
+ def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
+ (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
+ (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
+}
+
+// Match all LD1LN instructions
+defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
+ extloadi8, LD1LN_B>;
+
+defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
+ extloadi16, LD1LN_H>;
+
+defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
+ load, LD1LN_S>;
+defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
+ load, LD1LN_S>;
+
+defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
+ load, LD1LN_D>;
+defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
+ load, LD1LN_D>;
+
+class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane<0, r, op2_1, op0,
+ (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn]",
+ [],
+ NoItinerary> {
+ let mayStore = 1;
+ let neverHasSideEffects = 1;
+ let hasExtraDefRegAllocReq = 1;
+}
+
+multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
+ def _B : NeonI_STN_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H : NeonI_STN_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S : NeonI_STN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D : NeonI_STN_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ neon_uimm1_bare, asmop>{
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Store single 1-element structure from one lane of 1 register.
+defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
+
+// Store single N-element structure from one lane of N consecutive registers
+// (N = 2,3,4)
+defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
+defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
+defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
+
+multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
+ Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
+ Instruction INST> {
+ def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
+ GPR64xsp:$Rn),
+ (INST GPR64xsp:$Rn,
+ (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
+ ImmOp:$lane)>;
+
+ def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
+ GPR64xsp:$Rn),
+ (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
+}
+
+// Match all ST1LN instructions
+defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
+ truncstorei8, ST1LN_B>;
+
+defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
+ truncstorei16, ST1LN_H>;
+
+defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
+ store, ST1LN_S>;
+defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
+ store, ST1LN_S>;
+
+defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
+ store, ST1LN_D>;
+defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
+ store, ST1LN_D>;
+
+// End of vector load/store single N-element structure (class SIMD lsone).
+
+
+// The following are post-index load/store single N-element instructions
+// (class SIMD lsone-post)
+
+multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
+ RegisterOperand VecList, Operand ImmTy,
+ string asmop> {
+ let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt),
+ asmop # "\t$Rt, [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
+ (outs VecList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
+ asmop # "\t$Rt, [$Rn], $Rm",
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "8B_operand"),
+ uimm_b, asmop>;
+
+ defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "4H_operand"),
+ uimm_h, asmop>;
+
+ defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "2S_operand"),
+ uimm_s, asmop>;
+
+ defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "1D_operand"),
+ uimm_d, asmop>;
+
+ defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
+ !cast<RegisterOperand>(List # "16B_operand"),
+ uimm_b, asmop>;
+
+ defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
+ !cast<RegisterOperand>(List # "8H_operand"),
+ uimm_h, asmop>;
+
+ defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
+ !cast<RegisterOperand>(List # "4S_operand"),
+ uimm_s, asmop>;
+
+ defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
+ !cast<RegisterOperand>(List # "2D_operand"),
+ uimm_d, asmop>;
+}
+
+// Post-index load single 1-element structure to all lanes of 1 register
+defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index load single N-element structure to all lanes of N consecutive
+// registers (N = 2,3,4)
+defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+ Constraints = "$Rn = $wb, $Rt = $src",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
+ (outs VList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt,
+ VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $amt",
[],
- NoItinerary>;
- def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
- (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
+ (outs VList:$Rt, GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
+ VList:$src, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $Rm",
[],
NoItinerary>;
- def sss : NeonI_Scalar3Same<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
+}
+
+multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+
+ def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+}
+
+// Post-index load single 1-element structure to one lane of 1 register.
+defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index load single N-element structure to one lane of N consecutive
+// registers
+// (N = 2,3,4)
+defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+let mayStore = 1, neverHasSideEffects = 1,
+ hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
+ DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
+ class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, ImmTy:$amt,
+ VList:$Rt, ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $amt",
+ [],
+ NoItinerary> {
+ let Rm = 0b11111;
+ }
+
+ class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
+ Operand ImmTy, Operand ImmOp, string asmop>
+ : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
+ (outs GPR64xsp:$wb),
+ (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
+ ImmOp:$lane),
+ asmop # "\t$Rt[$lane], [$Rn], $Rm",
[],
NoItinerary>;
- def ddd : NeonI_Scalar3Same<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmop, " $Rd, $Rn, $Rm"),
- [],
- NoItinerary>;
+}
+
+multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
+ Operand uimm_b, Operand uimm_h,
+ Operand uimm_s, Operand uimm_d> {
+ def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _B_register : STN_WBReg_Lane<r, 0b00, op0,
+ !cast<RegisterOperand>(List # "B_operand"),
+ uimm_b, neon_uimm4_bare, asmop> {
+ let Inst{12-10} = lane{2-0};
+ let Inst{30} = lane{3};
+ }
+
+ def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _H_register : STN_WBReg_Lane<r, 0b01, op0,
+ !cast<RegisterOperand>(List # "H_operand"),
+ uimm_h, neon_uimm3_bare, asmop> {
+ let Inst{12-10} = {lane{1}, lane{0}, 0b0};
+ let Inst{30} = lane{2};
+ }
+
+ def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _S_register : STN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "S_operand"),
+ uimm_s, neon_uimm2_bare, asmop> {
+ let Inst{12-10} = {lane{0}, 0b0, 0b0};
+ let Inst{30} = lane{1};
+ }
+
+ def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
+ }
+
+ def _D_register : STN_WBReg_Lane<r, 0b10, op0,
+ !cast<RegisterOperand>(List # "D_operand"),
+ uimm_d, neon_uimm1_bare, asmop> {
+ let Inst{12-10} = 0b001;
+ let Inst{30} = lane{0};
}
}
-class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
- (SUBREG_TO_REG (i64 0),
- (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
- (EXTRACT_SUBREG VPR64:$Rm, sub_64)),
- sub_64)>;
+// Post-index store single 1-element structure from one lane of 1 register.
+defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
+ uimm_exact2, uimm_exact4, uimm_exact8>;
+
+// Post-index store single N-element structure from one lane of N consecutive
+// registers (N = 2,3,4)
+defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
+ uimm_exact4, uimm_exact8, uimm_exact16>;
+defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
+ uimm_exact6, uimm_exact12, uimm_exact24>;
+defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
+ uimm_exact8, uimm_exact16, uimm_exact32>;
+
+// End of post-index load/store single N-element instructions
+// (class SIMD lsone-post)
+
+// Neon Scalar instructions implementation
+// Scalar Three Same
+
+class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRC>
+ : NeonI_Scalar3Same<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
+
+multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
+ bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
+ }
+}
+
+multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
+ }
+}
+
+multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
+ def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
+ def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
+ def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
+ }
+}
+
+multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
+ def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
+ (INSTB FPR8:$Rn, FPR8:$Rm)>;
+
+ def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
+ Instruction INSTD>
+ : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+// Scalar Three Different
+
+class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar3Diff<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
+ def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
+ def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
+}
+
+multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
+ let Constraints = "$Src = $Rd" in {
+ def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS> {
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
+ (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
+ (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
+}
+
+// Scalar Two Registers Miscellaneous
+
+class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
+ string asmop> {
+ def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
+ FPR32>;
+ def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
+ FPR64>;
+}
+
+multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
+ def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
+}
+
+multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
+ def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
+}
+
+class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
+
+multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
+ def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
+ def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
+}
+
+class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
+ string asmop, RegisterClass FPRC>
+ : NeonI_Scalar2SameMisc<u, size, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
+ string asmop> {
+
+ let Constraints = "$Src = $Rd" in {
+ def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
+ def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
+ def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
+ def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
+ }
+}
+
+class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_Scalar2SameMisc<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [],
+ NoItinerary>;
+
+multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+ def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
+ (outs FPR64:$Rd), (ins FPR64:$Rn, fpz32:$FPImm),
+ !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
+ [],
+ NoItinerary>;
+}
+
+class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
+ (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
+ (INSTD FPR64:$Rn, 0)>;
+
+class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
+ Instruction INSTD>
+ : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
+ (i32 neon_uimm0:$Imm), CC)),
+ (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
+
+multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn),
+ (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))),
+ (INSTS FPR32:$Rn, fpz32:$FPImm)>;
+ def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn),
+ (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))),
+ (INSTD FPR64:$Rn, fpz32:$FPImm)>;
+}
+
+multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
+ def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
+ (INSTB FPR8:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+}
+
+multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Rn)>;
+
+}
+
+multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
+ (INSTB FPR8:$Src, FPR8:$Rn)>;
+ def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
+ (INSTH FPR16:$Src, FPR16:$Rn)>;
+ def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
+ (INSTS FPR32:$Src, FPR32:$Rn)>;
+ def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
+ (INSTD FPR64:$Src, FPR64:$Rn)>;
+}
+
+// Scalar Shift By Immediate
+
+class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRC, Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
+ string asmop> {
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
+ string asmop>
+ : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
+ def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPR64:$Rd),
+ (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ let Constraints = "$Src = $Rd";
+}
+
+class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
+ RegisterClass FPRCD, RegisterClass FPRCS,
+ Operand ImmTy>
+ : NeonI_ScalarShiftImm<u, opcode,
+ (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
+ !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
+ [], NoItinerary>;
+
+multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
+ string asmop> {
+ def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
+ shr_imm8> {
+ bits<3> Imm;
+ let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
+ let Inst{18-16} = Imm;
+ }
+ def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
+ shr_imm16> {
+ bits<4> Imm;
+ let Inst{22-20} = 0b001; // immh:immb = 001xxxx
+ let Inst{19-16} = Imm;
+ }
+ def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
+ shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+}
+
+multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
+ def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
+ bits<5> Imm;
+ let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
+ let Inst{20-16} = Imm;
+ }
+ def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
+ bits<6> Imm;
+ let Inst{22} = 0b1; // immh:immb = 1xxxxxx
+ let Inst{21-16} = Imm;
+ }
+}
+
+multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD> {
+ def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
+ (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTB,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD>
+ : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
+ def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
+ (INSTB FPR8:$Rn, imm:$Imm)>;
+ def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+}
+
+class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
+ (i32 shl_imm64:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
+ Instruction INSTD>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
+ (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
+
+multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
+ SDPatternOperator opnode,
+ Instruction INSTH,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
+ (INSTH FPR16:$Rn, imm:$Imm)>;
+ def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode,
+ SDPatternOperator Dopnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
+ (INSTS FPR32:$Rn, imm:$Imm)>;
+ def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INSTD FPR64:$Rn, imm:$Imm)>;
+}
+
+// Scalar Signed Shift Right (Immediate)
+defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>;
+
+// Scalar Unsigned Shift Right (Immediate)
+defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>;
+
+// Scalar Signed Rounding Shift Right (Immediate)
+defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
+
+// Scalar Unigned Rounding Shift Right (Immediate)
+defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
+defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
+
+// Scalar Signed Shift Right and Accumulate (Immediate)
+def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsrads_n, SSRA>;
+
+// Scalar Unsigned Shift Right and Accumulate (Immediate)
+def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsradu_n, USRA>;
+
+// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
+def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vrsrads_n, SRSRA>;
+
+// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
+def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vrsradu_n, URSRA>;
+
+// Scalar Shift Left (Immediate)
+defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
+defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>;
+
+// Signed Saturating Shift Left (Immediate)
+defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
+ SQSHLbbi, SQSHLhhi,
+ SQSHLssi, SQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
+
+// Unsigned Saturating Shift Left (Immediate)
+defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
+ UQSHLbbi, UQSHLhhi,
+ UQSHLssi, UQSHLddi>;
+// Pattern to match llvm.arm.* intrinsic.
+defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
+
+// Signed Saturating Shift Left Unsigned (Immediate)
+defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
+defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
+ SQSHLUbbi, SQSHLUhhi,
+ SQSHLUssi, SQSHLUddi>;
+
+// Shift Right And Insert (Immediate)
+def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
+def : Neon_ScalarShiftRImm_accum_D_size_patterns
+ <int_aarch64_neon_vsri, SRI>;
+
+// Shift Left And Insert (Immediate)
+def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
+def : Neon_ScalarShiftLImm_accum_D_size_patterns
+ <int_aarch64_neon_vsli, SLI>;
+
+// Signed Saturating Shift Right Narrow (Immediate)
+defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
+ SQSHRNbhi, SQSHRNhsi,
+ SQSHRNsdi>;
+
+// Unsigned Saturating Shift Right Narrow (Immediate)
+defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
+ UQSHRNbhi, UQSHRNhsi,
+ UQSHRNsdi>;
+
+// Signed Saturating Rounded Shift Right Narrow (Immediate)
+defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
+ SQRSHRNbhi, SQRSHRNhsi,
+ SQRSHRNsdi>;
+
+// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
+defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
+ UQRSHRNbhi, UQRSHRNhsi,
+ UQRSHRNsdi>;
+
+// Signed Saturating Shift Right Unsigned Narrow (Immediate)
+defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
+ SQSHRUNbhi, SQSHRUNhsi,
+ SQSHRUNsdi>;
+
+// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
+defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
+defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
+ SQRSHRUNbhi, SQRSHRUNhsi,
+ SQRSHRUNsdi>;
+
+// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
+defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_s32,
+ int_aarch64_neon_vcvtf64_n_s64,
+ SCVTF_Nssi, SCVTF_Nddi>;
+
+// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
+defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
+defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtf32_n_u32,
+ int_aarch64_neon_vcvtf64_n_u64,
+ UCVTF_Nssi, UCVTF_Nddi>;
+
+// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
+defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_s32_f32,
+ int_aarch64_neon_vcvtd_n_s64_f64,
+ FCVTZS_Nssi, FCVTZS_Nddi>;
+
+// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
+defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
+defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvts_n_u32_f32,
+ int_aarch64_neon_vcvtd_n_u64_f64,
+ FCVTZU_Nssi, FCVTZU_Nddi>;
+
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
+ (INST FPR64:$Rn, imm:$Imm)>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
+ SCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
+ UCVTF_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
+ FCVTZS_Nddi>;
+
+def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
+ FCVTZU_Nddi>;
// Scalar Integer Add
let isCommutable = 1 in {
@@ -1461,9 +4791,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
// Scalar Integer Sub
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
-// Pattern for Scalar Integer Add and Sub with D register
-def : Neon_Scalar_D_size_patterns<add, ADDddd>;
-def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
+// Pattern for Scalar Integer Add and Sub with D register only
+defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
// Scalar Integer Saturating Add (Signed, Unsigned)
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
@@ -1473,41 +4809,1212 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
-// Patterns for Scalar Integer Saturating Add, Sub with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
+ SQADDhhh, SQADDsss, SQADDddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
+ UQADDhhh, UQADDsss, UQADDddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
+ SQSUBhhh, SQSUBsss, SQSUBddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
+ UQSUBhhh, UQSUBsss, UQSUBddd>;
+
+// Scalar Integer Saturating Doubling Multiply Half High
+defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
+
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Doubling Multiply Half High and
+// Scalar Integer Saturating Rounding Doubling Multiply Half High
+defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
+ SQDMULHsss>;
+defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
+ SQRDMULHsss>;
+
+// Scalar Floating-point Multiply Extended
+defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
+
+// Scalar Floating-point Reciprocal Step
+defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
+
+// Scalar Floating-point Reciprocal Square Root Step
+defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Floating-point Reciprocal Step and
+// Scalar Floating-point Reciprocal Square Root Step
+defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
+ FRECPSddd>;
+defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
+ FRSQRTSddd>;
+
+def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Floating-point Multiply Extended,
+multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+ (INSTS FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+}
+
+defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
+ FMULXsss,FMULXddd>;
// Scalar Integer Shift Left (Signed, Unsigned)
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
+
// Scalar Integer Saturating Shift Left (Signed, Unsigned)
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
-// Scalar Integer Rouding Shift Left (Signed, Unsigned)
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
+ SQSHLhhh, SQSHLsss, SQSHLddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
+ UQSHLhhh, UQSHLsss, UQSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
+
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
+
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
+
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
-// Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
-// Rounding Shift Left, Rounding Saturating Shift Left with D register only
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
-def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
-def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
+ SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
+defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
+ UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
+// Patterns to match llvm.arm.* intrinsic for
+// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
+defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
+
+// Signed Saturating Doubling Multiply-Add Long
+defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
+ SQDMLALshh, SQDMLALdss>;
+
+// Signed Saturating Doubling Multiply-Subtract Long
+defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
+defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
+ SQDMLSLshh, SQDMLSLdss>;
+
+// Signed Saturating Doubling Multiply Long
+defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
+defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
+ SQDMULLshh, SQDMULLdss>;
+
+// Scalar Signed Integer Convert To Floating-point
+defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32,
+ int_aarch64_neon_vcvtf64_s64,
+ SCVTFss, SCVTFdd>;
+
+// Scalar Unsigned Integer Convert To Floating-point
+defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
+defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32,
+ int_aarch64_neon_vcvtf64_u64,
+ UCVTFss, UCVTFdd>;
+
+// Scalar Floating-point Converts
+def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
+def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
+ FCVTXN>;
+
+defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
+ FCVTNSss, FCVTNSdd>;
+
+defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
+ FCVTNUss, FCVTNUdd>;
+
+defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
+ FCVTMSss, FCVTMSdd>;
+
+defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
+ FCVTMUss, FCVTMUdd>;
+
+defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
+ FCVTASss, FCVTASdd>;
+
+defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
+ FCVTAUss, FCVTAUdd>;
+
+defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
+ FCVTPSss, FCVTPSdd>;
+
+defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
+ FCVTPUss, FCVTPUdd>;
+
+defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
+ FCVTZSss, FCVTZSdd>;
+
+defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
+defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
+ FCVTZUss, FCVTZUdd>;
+
+// Patterns For Convert Instructions Between v1f64 and v1i64
+class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
+def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
+
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
+def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
+
+// Scalar Floating-point Reciprocal Estimate
+defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe,
+ FRECPEss, FRECPEdd>;
+
+// Scalar Floating-point Reciprocal Exponent
+defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
+ FRECPXss, FRECPXdd>;
+
+// Scalar Floating-point Reciprocal Square Root Estimate
+defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
+defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte,
+ FRSQRTEss, FRSQRTEdd>;
+
+// Scalar Floating-point Round
+class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
+def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
+def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
+def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
+def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
+def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
+def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
+
+// Scalar Integer Compare
+
+// Scalar Compare Bitwise Equal
+def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
+
+class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
+ Instruction INSTD,
+ CondCode CC>
+ : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
+ (INSTD FPR64:$Rn, FPR64:$Rm)>;
+
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
+
+// Scalar Compare Signed Greather Than Or Equal
+def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
+
+// Scalar Compare Unsigned Higher Or Same
+def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
+
+// Scalar Compare Unsigned Higher
+def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
+
+// Scalar Compare Signed Greater Than
+def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
+def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
+
+// Scalar Compare Bitwise Test Bits
+def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
+def : Neon_Scalar3Same_cmp_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
+def : Neon_Scalar3Same_cmp_D_size_patterns<Neon_tst, CMTSTddd>;
+
+// Scalar Compare Bitwise Equal To Zero
+def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
+ CMEQddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
+
+// Scalar Compare Signed Greather Than Or Equal To Zero
+def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
+ CMGEddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
+
+// Scalar Compare Signed Greater Than Zero
+def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
+ CMGTddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
+
+// Scalar Compare Signed Less Than Or Equal To Zero
+def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
+ CMLEddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
+
+// Scalar Compare Less Than Zero
+def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
+def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
+ CMLTddi>;
+def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
+
+// Scalar Floating-point Compare
+
+// Scalar Floating-point Compare Mask Equal
+defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQsss, FCMEQddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
+
+// Scalar Floating-point Compare Mask Equal To Zero
+defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq,
+ FCMEQZssi, FCMEQZddi>;
+def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)),
+ (FCMEQZddi FPR64:$Rn, fpz32:$FPImm)>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal
+defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEsss, FCMGEddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
+
+// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
+defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge,
+ FCMGEZssi, FCMGEZddi>;
+
+// Scalar Floating-point Compare Mask Greather Than
+defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTsss, FCMGTddd>;
+def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
+
+// Scalar Floating-point Compare Mask Greather Than Zero
+defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt,
+ FCMGTZssi, FCMGTZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
+defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez,
+ FCMLEZssi, FCMLEZddi>;
+
+// Scalar Floating-point Compare Mask Less Than Zero
+defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
+defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz,
+ FCMLTZssi, FCMLTZddi>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
+defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage,
+ FACGEsss, FACGEddd>;
+
+// Scalar Floating-point Absolute Compare Mask Greater Than
+defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
+defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt,
+ FACGTsss, FACGTddd>;
+
+// Scakar Floating-point Absolute Difference
+defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
+defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd,
+ FABDsss, FABDddd>;
+
+// Scalar Absolute Value
+defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
+defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
+
+// Scalar Signed Saturating Absolute Value
+defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
+defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
+ SQABSbb, SQABShh, SQABSss, SQABSdd>;
+
+// Scalar Negate
+defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
+defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
+
+// Scalar Signed Saturating Negate
+defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
+defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
+ SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
+
+// Scalar Signed Saturating Accumulated of Unsigned Value
+defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
+defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
+ SUQADDbb, SUQADDhh,
+ SUQADDss, SUQADDdd>;
+
+// Scalar Unsigned Saturating Accumulated of Signed Value
+defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
+defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
+ USQADDbb, USQADDhh,
+ USQADDss, USQADDdd>;
+
+def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
+ (v1i64 FPR64:$Rn))),
+ (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
+ (v1i64 FPR64:$Rn))),
+ (USQADDdd FPR64:$Src, FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
+ (ABSdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
+ (SQABSdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
+ (SQNEGdd FPR64:$Rn)>;
+
+def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
+ (v1i64 FPR64:$Rn))),
+ (NEGdd FPR64:$Rn)>;
+
+// Scalar Signed Saturating Extract Unsigned Narrow
+defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
+ SQXTUNbh, SQXTUNhs,
+ SQXTUNsd>;
+
+// Scalar Signed Saturating Extract Narrow
+defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
+ SQXTNbh, SQXTNhs,
+ SQXTNsd>;
+
+// Scalar Unsigned Saturating Extract Narrow
+defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
+defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
+ UQXTNbh, UQXTNhs,
+ UQXTNsd>;
+
+// Scalar Reduce Pairwise
+
+multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0> {
+ let isCommutable = Commutable in {
+ def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
+ (outs FPR64:$Rd), (ins VPR128:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn.2d"),
+ [],
+ NoItinerary>;
+ }
+}
+
+multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
+ string asmop, bit Commutable = 0>
+ : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
+ let isCommutable = Commutable in {
+ def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
+ (outs FPR32:$Rd), (ins VPR64:$Rn),
+ !strconcat(asmop, "\t$Rd, $Rn.2s"),
+ [],
+ NoItinerary>;
+ }
+}
+
+// Scalar Reduce Addition Pairwise (Integer) with
+// Pattern to match llvm.arm.* intrinsic
+defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
+
+// Pattern to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Addition Pairwise (Integer)
+def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
+ (ADDPvv_D_2D VPR128:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
+ (ADDPvv_D_2D VPR128:$Rn)>;
+
+// Scalar Reduce Addition Pairwise (Floating Point)
+defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
+
+// Scalar Reduce Maximum Pairwise (Floating Point)
+defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
+
+// Scalar Reduce Minimum Pairwise (Floating Point)
+defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
+
+// Scalar Reduce maxNum Pairwise (Floating Point)
+defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
+
+// Scalar Reduce minNum Pairwise (Floating Point)
+defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
+
+multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
+ SDPatternOperator opnodeD,
+ Instruction INSTS,
+ Instruction INSTD> {
+ def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
+ (INSTS VPR64:$Rn)>;
+ def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
+ (INSTD VPR128:$Rn)>;
+}
+
+// Patterns to match llvm.aarch64.* intrinsic for
+// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
+ int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
+ int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
+ int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
+ int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
+ int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vaddv,
+ int_aarch64_neon_vaddv, FADDPvv_S_2S, FADDPvv_D_2D>;
+
+def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))),
+ (FADDPvv_S_2S (v2f32
+ (EXTRACT_SUBREG
+ (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
+ sub_64)))>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxv,
+ int_aarch64_neon_vmaxv, FMAXPvv_S_2S, FMAXPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminv,
+ int_aarch64_neon_vminv, FMINPvv_S_2S, FMINPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vmaxnmv,
+ int_aarch64_neon_vmaxnmv, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
+
+defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vminnmv,
+ int_aarch64_neon_vminnmv, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
+
+// Scalar by element Arithmetic
+
+class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
+ string rmlane, bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR, RegisterClass OpFPR,
+ RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
+ string rmlane,
+ bit u, bit szhi, bit szlo,
+ RegisterClass ResFPR,
+ RegisterClass OpFPR,
+ RegisterOperand OpVPR,
+ Operand OpImm>
+ : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
+ (outs ResFPR:$Rd),
+ (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<3> Imm;
+ bits<5> MRm;
+}
+
+// Scalar Floating Point multiply (scalar, by element)
+def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
+ 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point multiply extended (scalar, by element)
+def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
+ 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
+ SDPatternOperator opnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
+ (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
+ (ResTy (INST (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped operands
+ def : Pat<(ResTy (opnode
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Rn))),
+ (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (opnode
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Rn))),
+ (ResTy (INST (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for Scalar Floating Point multiply (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
+ f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+// Patterns for Scalar Floating Point multiply extended (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
+ FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
+ FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+
+
+// Scalar Floating Point fused multiply-add (scalar, by element)
+def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
+ 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Floating Point fused multiply-subtract (scalar, by element)
+def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
+ 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{11} = Imm{0}; // h
+ let Inst{21} = 0b0; // l
+ let Inst{20-16} = MRm;
+}
+// We are allowed to match the fma instruction regardless of compile options.
+multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
+ Instruction FMLAI, Instruction FMLSI,
+ ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+ // fmla
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped fmla operands
+ def : Pat<(ResTy (fma
+ (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma
+ (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLAI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // fmls
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
+ (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+
+ // swapped fmls operands
+ def : Pat<(ResTy (fma
+ (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (fma
+ (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
+ (ResTy FPRC:$Rn),
+ (ResTy FPRC:$Ra))),
+ (ResTy (FMLSI (ResTy FPRC:$Ra),
+ (ResTy FPRC:$Rn),
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Scalar Floating Point fused multiply-add and
+// multiply-subtract (scalar, by element)
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
+ f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
+ f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+// Scalar Signed saturating doubling multiply long (scalar, by element)
+def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
+ 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
+ SDPatternOperator opnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass FPRC,
+ ValueType OpVTy, ValueType OpTy,
+ ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
+
+ def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
+ (OpVTy (scalar_to_vector
+ (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
+ (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+
+ //swapped operands
+ def : Pat<(ResTy (opnode
+ (OpVTy (scalar_to_vector
+ (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
+ (OpVTy FPRC:$Rn))),
+ (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
+}
+
+
+// Patterns for Scalar Signed saturating doubling
+// multiply long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
+ SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Signed saturating doubling multiply-add long (scalar, by element)
+def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
+ 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Scalar Signed saturating doubling
+// multiply-subtract long (scalar, by element)
+def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
+ 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
+ SDPatternOperator opnode,
+ SDPatternOperator coreopnode,
+ Instruction INST,
+ ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
+ ValueType OpTy,
+ ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
+
+ def : Pat<(ResTy (opnode
+ (ResTy ResFPRC:$Ra),
+ (ResTy (coreopnode (OpTy FPRC:$Rn),
+ (OpTy (scalar_to_vector
+ (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
+ (ResTy (INST (ResTy ResFPRC:$Ra),
+ (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+
+ // swapped operands
+ def : Pat<(ResTy (opnode
+ (ResTy ResFPRC:$Ra),
+ (ResTy (coreopnode
+ (OpTy (scalar_to_vector
+ (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
+ (OpTy FPRC:$Rn))))),
+ (ResTy (INST (ResTy ResFPRC:$Ra),
+ (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
+}
+
+// Patterns for Scalar Signed saturating
+// doubling multiply-add long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
+ int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Patterns for Scalar Signed saturating
+// doubling multiply-sub long (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
+ int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar general arithmetic operation
+class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
+
+class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (INST FPR64:$Rn, FPR64:$Rm)>;
+
+class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
+ Instruction INST>
+ : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
+ (v1f64 FPR64:$Ra))),
+ (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
+
+def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
+def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
+
+def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
+def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
+
+def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
+def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
+
+// Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
+ 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+// Patterns for Scalar Signed saturating doubling multiply returning
+// high half (scalar, by element)
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
+ i32, VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
+ i32, VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
+ i32, VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
+ SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
+ i32, VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Signed saturating rounding doubling multiply
+// returning high half (scalar, by element)
+def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
+ let Inst{11} = Imm{2}; // h
+ let Inst{21} = Imm{1}; // l
+ let Inst{20} = Imm{0}; // m
+ let Inst{19-16} = MRm{3-0};
+}
+def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
+ let Inst{11} = 0b0; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
+ 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{11} = Imm{1}; // h
+ let Inst{21} = Imm{0}; // l
+ let Inst{20-16} = MRm;
+}
+
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
+ VPR64Lo, neon_uimm2_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
+ VPR128Lo, neon_uimm3_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
+ VPR64Lo, neon_uimm1_bare>;
+defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
+ SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
+ VPR128Lo, neon_uimm2_bare>;
+
+// Scalar Copy - DUP element to scalar
+class NeonI_Scalar_DUP<string asmop, string asmlane,
+ RegisterClass ResRC, RegisterOperand VPRC,
+ Operand OpImm>
+ : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+multiclass NeonI_Scalar_DUP_Elt_pattern<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, Operand OpImm,
+ ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
+ def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for vector extract of FP data using scalar DUP instructions
+defm : NeonI_Scalar_DUP_Elt_pattern<DUPsv_S, f32,
+ v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Elt_pattern<DUPdv_D, f64,
+ v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
+
+multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
+ ValueType ResTy, ValueType OpTy,Operand OpLImm,
+ ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
+
+ def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for extract subvectors of v1ix data using scalar DUP instructions.
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+
+multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
+ (neon_uimm0_bare:$Imm))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (vector_insert (ResTy undef),
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
+ (OpNImm:$Imm))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
+ ValueType OpTy, ValueType ElemTy,
+ Operand OpImm, ValueType OpNTy,
+ ValueType ExTy, Operand OpNImm> {
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
+ (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
+
+ def : Pat<(ResTy (scalar_to_vector
+ (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
+ (ResTy (DUPI
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ OpNImm:$Imm))>;
+}
+
+// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
+// instructions.
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1i64, v2i64, i64, neon_uimm1_bare,
+ v1i64, v2i64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1i32, v4i32, i32, neon_uimm2_bare,
+ v2i32, v4i32, neon_uimm1_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
+ v1i16, v8i16, i32, neon_uimm3_bare,
+ v4i16, v8i16, neon_uimm2_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
+ v1i8, v16i8, i32, neon_uimm4_bare,
+ v8i8, v16i8, neon_uimm3_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
+ v1f64, v2f64, f64, neon_uimm1_bare,
+ v1f64, v2f64, neon_uimm0_bare>;
+defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
+ v1f32, v4f32, f32, neon_uimm2_bare,
+ v2f32, v4f32, neon_uimm1_bare>;
+
+multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
+ Instruction DUPI, Operand OpImm,
+ RegisterClass ResRC> {
+ def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
+ (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
+}
+
+// Aliases for Scalar copy - DUP element (scalar)
+// FIXME: This is actually the preferred syntax but TableGen can't deal with
+// custom printing of aliases.
+defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
+defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
+
+multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
+ ValueType OpTy> {
+ def : Pat<(ResTy (GetLow VPR128:$Rn)),
+ (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
+ def : Pat<(ResTy (GetHigh VPR128:$Rn)),
+ (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
+}
+
+defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
+defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
+defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
+defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
+defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
+defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -1578,57 +6085,2587 @@ def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
-
// ...and scalar bitcasts...
+def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
+def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
+def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
+
+def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
+def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
+
+def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
+
+def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
+
+def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
+def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
+
+def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
+
+def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
+def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
+def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
+
+def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
+
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+// Scalar Three Same
+
+def neon_uimm3 : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 8;}]> {
+ let ParserMatchClass = uimm3_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
+}
+
+def neon_uimm4 : Operand<i64>,
+ ImmLeaf<i64, [{return Imm < 16;}]> {
+ let ParserMatchClass = uimm4_asmoperand;
+ let PrintMethod = "printUImmHexOperand";
+}
+
+// Bitwise Extract
+class NeonI_Extract<bit q, bits<2> op2, string asmop,
+ string OpS, RegisterOperand OpVPR, Operand OpImm>
+ : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
+ (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
+ asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
+ ", $Rm." # OpS # ", $Index",
+ [],
+ NoItinerary>{
+ bits<4> Index;
+}
+
+def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
+ VPR64, neon_uimm3> {
+ let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
+}
+
+def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
+ VPR128, neon_uimm4> {
+ let Inst{14-11} = Index;
+}
+
+class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
+ Operand OpImm>
+ : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
+ (i64 OpImm:$Imm))),
+ (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
+
+def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
+def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
+def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
+
+// Table lookup
+class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary>;
+
+// The vectors in look up table are always 16b
+multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
+defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
+defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
+defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
+
+// Table lookup extention
+class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
+ string asmop, string OpS, RegisterOperand OpVPR,
+ RegisterOperand VecList>
+ : NeonI_TBL<q, op2, len, op,
+ (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
+ asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+}
+
+// The vectors in look up table are always 16b
+multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
+ def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+
+ def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
+ !cast<RegisterOperand>(List # "16B_operand")>;
+}
+
+defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
+defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
+defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
+defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
+
+class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
+ RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
+ : NeonI_copy<0b1, 0b0, 0b0011,
+ (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd." # Res # "[$Imm], $Rn",
+ [(set (ResTy VPR128:$Rd),
+ (ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (OpTy OpGPR:$Rn),
+ (OpImm:$Imm))))],
+ NoItinerary> {
+ bits<4> Imm;
+ let Constraints = "$src = $Rd";
+}
+
+//Insert element (vector, from main)
+def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
+ neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
+ (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
+ (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
+ (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
+ (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
+
+class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
+ RegisterClass OpGPR, ValueType OpTy,
+ Operand OpImm, Instruction INS>
+ : Pat<(ResTy (vector_insert
+ (ResTy VPR64:$src),
+ (OpTy OpGPR:$Rn),
+ (OpImm:$Imm))),
+ (ResTy (EXTRACT_SUBREG
+ (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
+
+def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
+ neon_uimm3_bare, INSbw>;
+def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
+ neon_uimm2_bare, INShw>;
+def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
+ neon_uimm1_bare, INSsw>;
+def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
+ neon_uimm0_bare, INSdx>;
+
+class NeonI_INS_element<string asmop, string Res, Operand ResImm>
+ : NeonI_insert<0b1, 0b1,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
+ ResImm:$Immd, ResImm:$Immn),
+ asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
+ [],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ bits<4> Immd;
+ bits<4> Immn;
+}
+
+//Insert element (vector, from element)
+def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
+ let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
+ let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
+}
+def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
+ let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
+ let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
+ // bit 11 is unspecified, but should be set to zero.
+}
+def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
+ let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
+ let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
+ // bits 11-12 are unspecified, but should be set to zero.
+}
+def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
+ let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
+ let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
+ // bits 11-13 are unspecified, but should be set to zero.
+}
+
+def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
+ (INSELb VPR128:$Rd, VPR128:$Rn,
+ neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
+ (INSELh VPR128:$Rd, VPR128:$Rn,
+ neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
+ (INSELs VPR128:$Rd, VPR128:$Rn,
+ neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
+def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
+ (INSELd VPR128:$Rd, VPR128:$Rn,
+ neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
+
+multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy, Operand StImm, Operand NaImm,
+ Instruction INS> {
+def : Pat<(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
+ StImm:$Immd, StImm:$Immn)>;
+
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (StImm:$Immd))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ StImm:$Immd, NaImm:$Immn)>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (ResTy VPR128:$Rn),
+ (StImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy VPR128:$Rn),
+ NaImm:$Immd, StImm:$Immn)),
+ sub_64))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy (vector_extract
+ (NaTy VPR64:$Rn),
+ (NaImm:$Immn))),
+ (NaImm:$Immd))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
+ NaImm:$Immd, NaImm:$Immn)),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, INSELb>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, INSELh>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, INSELs>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
+ neon_uimm0_bare, INSELd>;
+
+multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
+ ValueType MidTy,
+ RegisterClass OpFPR, Operand ResImm,
+ SubRegIndex SubIndex, Instruction INS> {
+def : Pat <(ResTy (vector_insert
+ (ResTy VPR128:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (INS (ResTy VPR128:$src),
+ (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
+ ResImm:$Imm,
+ (i64 0))>;
+
+def : Pat <(NaTy (vector_insert
+ (NaTy VPR64:$src),
+ (MidTy OpFPR:$Rn),
+ (ResImm:$Imm))),
+ (NaTy (EXTRACT_SUBREG
+ (ResTy (INS
+ (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
+ (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
+ ResImm:$Imm,
+ (i64 0))),
+ sub_64))>;
+}
+
+defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
+ sub_32, INSELs>;
+defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
+ sub_64, INSELd>;
+
+class NeonI_SMOV<string asmop, string Res, bit Q,
+ ValueType OpTy, ValueType eleTy,
+ Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
+ : NeonI_copy<Q, 0b0, 0b0101,
+ (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
+ [(set (ResTy ResGPR:$Rd),
+ (ResTy (sext_inreg
+ (ResTy (vector_extract
+ (OpTy VPR128:$Rn), (OpImm:$Imm))),
+ eleTy)))],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+//Signed integer move (main, from element)
+def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
+ ValueType eleTy, Operand StImm, Operand NaImm,
+ Instruction SMOVI> {
+ def : Pat<(i64 (sext_inreg
+ (i64 (anyext
+ (i32 (vector_extract
+ (StTy VPR128:$Rn), (StImm:$Imm))))),
+ eleTy)),
+ (SMOVI VPR128:$Rn, StImm:$Imm)>;
+
+ def : Pat<(i64 (sext
+ (i32 (vector_extract
+ (StTy VPR128:$Rn), (StImm:$Imm))))),
+ (SMOVI VPR128:$Rn, StImm:$Imm)>;
+
+ def : Pat<(i64 (sext_inreg
+ (i64 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+ def : Pat<(i64 (sext_inreg
+ (i64 (anyext
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+ def : Pat<(i64 (sext
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))))),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+}
+
+defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVxb>;
+defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVxh>;
+defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, SMOVxs>;
+
+class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
+ ValueType eleTy, Operand StImm, Operand NaImm,
+ Instruction SMOVI>
+ : Pat<(i32 (sext_inreg
+ (i32 (vector_extract
+ (NaTy VPR64:$Rn), (NaImm:$Imm))),
+ eleTy)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
+ neon_uimm3_bare, SMOVwb>;
+def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
+ neon_uimm2_bare, SMOVwh>;
+
+class NeonI_UMOV<string asmop, string Res, bit Q,
+ ValueType OpTy, Operand OpImm,
+ RegisterClass ResGPR, ValueType ResTy>
+ : NeonI_copy<Q, 0b0, 0b0111,
+ (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
+ [(set (ResTy ResGPR:$Rd),
+ (ResTy (vector_extract
+ (OpTy VPR128:$Rn), (OpImm:$Imm))))],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+//Unsigned integer move (main, from element)
+def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
+ GPR32, i32> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
+ GPR64, i64> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
+ (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
+def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
+ (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
+
+class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
+ Operand StImm, Operand NaImm,
+ Instruction SMOVI>
+ : Pat<(ResTy (vector_extract
+ (NaTy VPR64:$Rn), NaImm:$Imm)),
+ (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ NaImm:$Imm)>;
+
+def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
+ neon_uimm3_bare, UMOVwb>;
+def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
+ neon_uimm2_bare, UMOVwh>;
+def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
+ neon_uimm1_bare, UMOVws>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
+ 255)),
+ (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
+ 65535)),
+ (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
+
+def : Pat<(i64 (zext
+ (i32 (vector_extract
+ (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
+ (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
+ 255)),
+ (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm3_bare:$Imm)>;
+
+def : Pat<(i32 (and
+ (i32 (vector_extract
+ (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
+ 65535)),
+ (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm2_bare:$Imm)>;
+
+def : Pat<(i64 (zext
+ (i32 (vector_extract
+ (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
+ (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
+ neon_uimm0_bare:$Imm)>;
+
+// Additional copy patterns for scalar types
+def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
+ (UMOVwb (v16i8
+ (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
+ (UMOVwh (v8i16
+ (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
+
+def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
+ (FMOVws FPR32:$Rn)>;
+
+def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
+ (FMOVxd FPR64:$Rn)>;
+
+def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
+ (f64 FPR64:$Rn)>;
+
+def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
+ (f32 FPR32:$Rn)>;
+
+def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
+ (v1i8 (EXTRACT_SUBREG (v16i8
+ (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_8))>;
+
+def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
+ (v1i16 (EXTRACT_SUBREG (v8i16
+ (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
+ sub_16))>;
+
+def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
+ (FMOVsw $src)>;
+
+def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
+ (FMOVdx $src)>;
+
+def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (v1f32 FPR32:$Rn)>;
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
+ (v1f64 FPR64:$Rn)>;
+
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
+ (FMOVdd $src)>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
+ (f64 FPR64:$src), sub_64)>;
+
+class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
+ RegisterOperand ResVPR, Operand OpImm>
+ : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
+ (ins VPR128:$Rn, OpImm:$Imm),
+ asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
+ [],
+ NoItinerary> {
+ bits<4> Imm;
+}
+
+def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
+ neon_uimm1_bare> {
+ let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
+}
+
+def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
+ neon_uimm4_bare> {
+ let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
+}
+
+def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
+ neon_uimm3_bare> {
+ let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
+}
+
+def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
+ neon_uimm2_bare> {
+ let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
+}
+
+multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
+ ValueType OpTy,ValueType NaTy,
+ ValueType ExTy, Operand OpLImm,
+ Operand OpNImm> {
+def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
+ (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
+
+def : Pat<(ResTy (Neon_vduplane
+ (NaTy VPR64:$Rn), OpNImm:$Imm)),
+ (ResTy (DUPELT
+ (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
+}
+defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
+ neon_uimm4_bare, neon_uimm3_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
+ neon_uimm3_bare, neon_uimm2_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
+ neon_uimm2_bare, neon_uimm1_bare>;
+defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
+ neon_uimm1_bare, neon_uimm0_bare>;
+
+def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v2f32 (DUPELT2s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
+ (v4f32 (DUPELT4s
+ (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
+ (i64 0)))>;
+def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
+ (v2f64 (DUPELT2d
+ (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
+ (i64 0)))>;
+
+class NeonI_DUP<bit Q, string asmop, string rdlane,
+ RegisterOperand ResVPR, ValueType ResTy,
+ RegisterClass OpGPR, ValueType OpTy>
+ : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
+ asmop # "\t$Rd" # rdlane # ", $Rn",
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
+ NoItinerary>;
+
+def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
+ let Inst{20-16} = 0b00001;
+ // bits 17-20 are unspecified, but should be set to zero.
+}
+
+def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
+ let Inst{20-16} = 0b00010;
+ // bits 18-20 are unspecified, but should be set to zero.
+}
+
+def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
+ let Inst{20-16} = 0b00100;
+ // bits 19-20 are unspecified, but should be set to zero.
+}
+
+def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
+ let Inst{20-16} = 0b01000;
+ // bit 20 is unspecified, but should be set to zero.
+}
+
+def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
+ let Inst{20-16} = 0b00001;
+ // bits 17-20 are unspecified, but should be set to zero.
+}
+
+def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
+ let Inst{20-16} = 0b00010;
+ // bits 18-20 are unspecified, but should be set to zero.
+}
+
+def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
+ let Inst{20-16} = 0b00100;
+ // bits 19-20 are unspecified, but should be set to zero.
+}
+
+// patterns for CONCAT_VECTORS
+multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
+ (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
+ (INSELd
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
+ (i64 1),
+ (i64 0))>;
+def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
+ (DUPELT2d
+ (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (i64 0))> ;
+}
+
+defm : Concat_Vector_Pattern<v16i8, v8i8>;
+defm : Concat_Vector_Pattern<v8i16, v4i16>;
+defm : Concat_Vector_Pattern<v4i32, v2i32>;
+defm : Concat_Vector_Pattern<v2i64, v1i64>;
+defm : Concat_Vector_Pattern<v4f32, v2f32>;
+defm : Concat_Vector_Pattern<v2f64, v1f64>;
+
+//patterns for EXTRACT_SUBVECTOR
+def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
+ (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
+ (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
+ (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
+ (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
+ (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
+ (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
+
+// The followings are for instruction class (3V Elem)
+
+// Variant 1
+
+class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS, string EleOpS,
+ Operand OpImm, RegisterOperand ResVPR,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR>
+ : NeonI_2VElem<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
+ EleOpVPR:$Re, OpImm:$Index),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
+ ", $Re." # EleOpS # "[$Index]",
+ [],
+ NoItinerary> {
+ bits<3> Index;
+ bits<5> Re;
+
+ let Constraints = "$src = $Rd";
+}
+
+multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
+ neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
+defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
+ ValueType EleOpTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
+ ValueType EleOpTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST ResVPR:$src, OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
+{
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
+ op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
+ op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
+ op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
+}
+
+defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
+defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
+
+class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
+ string asmop, string ResS, string OpS, string EleOpS,
+ Operand OpImm, RegisterOperand ResVPR,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR>
+ : NeonI_2VElem<q, u, size, opcode,
+ (outs ResVPR:$Rd), (ins OpVPR:$Rn,
+ EleOpVPR:$Re, OpImm:$Index),
+ asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
+ ", $Re." # EleOpS # "[$Index]",
+ [],
+ NoItinerary> {
+ bits<3> Index;
+ bits<5> Re;
+}
+
+multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
+ neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
+defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
+defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2i32, v2i32, v4i32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, v4i32, v4i32, v4i32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
+ op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
+ op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2i32, v2i32, v2i32>;
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
+ op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
+}
+
+defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
+defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
+defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
+
+// Variant 2
+
+multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // _1d2d doesn't exist!
+
+ def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
+ neon_uimm1_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{0}};
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Re;
+ }
+}
+
+defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
+defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
+
+class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand OpVPR, RegisterOperand EleOpVPR,
+ ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (OpTy OpVPR:$Rn),
+ (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
+ (INST OpVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
+
+multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2f32, v2f32, v4f32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
+ op, VPR128, VPR128, v4f32, v4f32, v4f32>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
+ op, VPR128, VPR128, v2f64, v2f64, v2f64>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2f32, v2f32, v2f32>;
+
+ def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
+ op, VPR128, VPR64, v2f64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
+}
+
+defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
+defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
+
+def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
+ (v2f32 VPR64:$Rn))),
+ (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
+ (v4f32 VPR128:$Rn))),
+ (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
+ (v2f64 VPR128:$Rn))),
+ (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
+
+// The followings are patterns using fma
+// -ffp-contract=fast generates fma
+
+multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
+ neon_uimm2_bare, VPR64, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // _1d2d doesn't exist!
+
+ def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
+ neon_uimm1_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{0}};
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Re;
+ }
+}
+
+defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
+defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
+ (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))),
+ (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane 0
+class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
+ RegisterOperand ResVPR, ValueType ResTy>
+ : Pat<(ResTy (op (ResTy ResVPR:$Rn),
+ (ResTy (Neon_vdup (f32 FPR32:$Re))),
+ (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
+ (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
+ SDPatternOperator op,
+ RegisterOperand ResVPR, RegisterOperand OpVPR,
+ ValueType ResTy, ValueType OpTy,
+ SDPatternOperator coreop>
+ : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
+ (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
+
+
+multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
+ op, VPR64, v2f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
+ op, VPR128, v4f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
+}
+
+defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
+
+// Pattern for lane 0
+class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
+ RegisterOperand ResVPR, ValueType ResTy>
+ : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
+ (ResTy (Neon_vdup (f32 FPR32:$Re))),
+ (ResTy ResVPR:$src))),
+ (INST ResVPR:$src, ResVPR:$Rn,
+ (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
+
+multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
+{
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
+ op, VPR64, v2f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
+ op, VPR128, v4f32>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(fneg (Neon_vduplane
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
+ neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
+ BinOpFrag<(Neon_vduplane
+ (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
+ BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
+ neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
+ BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(fneg (Neon_combine_2d
+ node:$LHS, node:$RHS))>>;
+
+ def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
+ neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
+ BinOpFrag<(Neon_combine_2d
+ (fneg node:$LHS), (fneg node:$RHS))>>;
+}
+
+defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
+
+// Variant 3: Long type
+// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
+// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
+
+multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
+ neon_uimm2_bare, VPR128, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
+ neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
+defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
+defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
+defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
+defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
+defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
+
+multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
+ // vector register class for element is always 128-bit to cover the max index
+ def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
+ neon_uimm2_bare, VPR128, VPR64, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
+ neon_uimm2_bare, VPR128, VPR128, VPR128> {
+ let Inst{11} = {Index{1}};
+ let Inst{21} = {Index{0}};
+ let Inst{20-16} = Re;
+ }
+
+ // Index operations on 16-bit(H) elements are restricted to using v0-v15.
+ def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
+ neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+
+ def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
+ neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
+ let Inst{11} = {Index{2}};
+ let Inst{21} = {Index{1}};
+ let Inst{20} = {Index{0}};
+ let Inst{19-16} = Re{3-0};
+ }
+}
+
+defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
+defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
+defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
+
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
+ (FMOVdd $src)>;
+def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))),
+ (FMOVss $src)>;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$src, VPR128:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
+ ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop, Instruction DupInst>
+ : Pat<(ResTy (op (ResTy VPR128:$src),
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
+ (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
+
+multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
+ op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
+ op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
+defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
+defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
+defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
+
+// Pattern for lane in 128-bit vector
+class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
+
+// Pattern for lane in 64-bit vector
+class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
+ RegisterOperand EleOpVPR, ValueType ResTy,
+ ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vduplane
+ (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
+ (INST VPR128:$Rn,
+ (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
+
+// Pattern for fixed lane 0
+class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
+ ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
+ SDPatternOperator hiop, Instruction DupInst>
+ : Pat<(ResTy (op
+ (HalfOpTy (hiop (OpTy VPR128:$Rn))),
+ (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
+ (INST VPR128:$Rn, (DupInst $Re), 0)>;
+
+multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ op, VPR64, VPR128, v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
+ op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
+ op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ op, VPR64, VPR64, v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
+defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
+defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
+
+multiclass NI_qdma<SDPatternOperator op> {
+ def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (op node:$Ra,
+ (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
+
+ def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
+ (op node:$Ra,
+ (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
+}
+
+defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
+defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
+
+multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
+ v4i32, v4i16, v8i16>;
+
+ def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
+ v2i64, v2i32, v4i32>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128Lo,
+ v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128,
+ v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
+ !cast<PatFrag>(op # "_4s"),
+ v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
+
+ def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
+ !cast<PatFrag>(op # "_2d"),
+ v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
+
+ // Index can only be half of the max value for lane in 64-bit vector
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
+ v4i32, v4i16, v4i16>;
+
+ def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
+ !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
+ v2i64, v2i32, v2i32>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
+ !cast<PatFrag>(op # "_4s"), VPR64Lo,
+ v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
+
+ def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
+ !cast<PatFrag>(op # "_2d"), VPR64,
+ v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
+}
+
+defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
+defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
+
+// End of implementation for instruction class (3V Elem)
+
+class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
+ bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
+ SDPatternOperator Neon_Rev>
+ : NeonI_2VMisc<Q, U, size, opcode,
+ (outs ResVPR:$Rd), (ins ResVPR:$Rn),
+ asmop # "\t$Rd." # Res # ", $Rn." # Res,
+ [(set (ResTy ResVPR:$Rd),
+ (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
+ NoItinerary> ;
+
+def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
+ v16i8, Neon_rev64>;
+def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
+ v8i16, Neon_rev64>;
+def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
+ v4i32, Neon_rev64>;
+def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
+ v8i8, Neon_rev64>;
+def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
+ v4i16, Neon_rev64>;
+def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
+ v2i32, Neon_rev64>;
+
+def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
+def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
+
+def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
+ v16i8, Neon_rev32>;
+def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
+ v8i16, Neon_rev32>;
+def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
+ v8i8, Neon_rev32>;
+def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
+ v4i16, Neon_rev32>;
+
+def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
+ v16i8, Neon_rev16>;
+def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
+ v8i8, Neon_rev16>;
+
+multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Padd> {
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.16b",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.8b",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.8h",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.4h",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.4s",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.1d, $Rn.2s",
+ [(set (v1i64 VPR64:$Rd),
+ (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
+ int_arm_neon_vpaddls>;
+defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
+ int_arm_neon_vpaddlu>;
+
+multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Padd> {
+ let Constraints = "$src = $Rd" in {
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.16b",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Padd
+ (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.8b",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Padd
+ (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.8h",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Padd
+ (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.4h",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Padd
+ (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.4s",
+ [(set (v2i64 VPR128:$Rd),
+ (v2i64 (Neon_Padd
+ (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.1d, $Rn.2s",
+ [(set (v1i64 VPR64:$Rd),
+ (v1i64 (Neon_Padd
+ (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+ }
+}
+
+defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
+ int_arm_neon_vpadals>;
+defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
+ int_arm_neon_vpadalu>;
+
+multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [], NoItinerary>;
+}
+
+defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
+defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
+defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
+defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
+
+multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
+ (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
+
+ def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
+ (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
+ (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
+ (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
+defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
+
+def : Pat<(v16i8 (sub
+ (v16i8 Neon_AllZero),
+ (v16i8 VPR128:$Rn))),
+ (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (sub
+ (v8i8 Neon_AllZero),
+ (v8i8 VPR64:$Rn))),
+ (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (sub
+ (v8i16 (bitconvert (v16i8 Neon_AllZero))),
+ (v8i16 VPR128:$Rn))),
+ (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
+def : Pat<(v4i16 (sub
+ (v4i16 (bitconvert (v8i8 Neon_AllZero))),
+ (v4i16 VPR64:$Rn))),
+ (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
+def : Pat<(v4i32 (sub
+ (v4i32 (bitconvert (v16i8 Neon_AllZero))),
+ (v4i32 VPR128:$Rn))),
+ (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
+def : Pat<(v2i32 (sub
+ (v2i32 (bitconvert (v8i8 Neon_AllZero))),
+ (v2i32 VPR64:$Rn))),
+ (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
+def : Pat<(v2i64 (sub
+ (v2i64 (bitconvert (v16i8 Neon_AllZero))),
+ (v2i64 VPR128:$Rn))),
+ (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
+
+multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
+ let Constraints = "$src = $Rd" in {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [], NoItinerary>;
+ }
+}
+
+defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
+defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
+
+multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
+ (v16i8 (!cast<Instruction>(Prefix # 16b)
+ (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
+
+ def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
+ (v8i16 (!cast<Instruction>(Prefix # 8h)
+ (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
+ (v4i32 (!cast<Instruction>(Prefix # 4s)
+ (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
+ (v2i64 (!cast<Instruction>(Prefix # 2d)
+ (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8b)
+ (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4h)
+ (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2s)
+ (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
+}
+
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
+defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
+
+multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
+ SDPatternOperator Neon_Op> {
+ def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8h, $Rn.8h",
+ [(set (v8i16 VPR128:$Rd),
+ (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [(set (v8i8 VPR64:$Rd),
+ (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4h",
+ [(set (v4i16 VPR64:$Rd),
+ (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
+defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
+
+multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
+ bits<5> Opcode> {
+ def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [], NoItinerary>;
+
+ def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8b",
+ [], NoItinerary>;
+}
+
+defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
+defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
+defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
+
+def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
+ (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
+def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
+ (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
+
+def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
+ (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
+ (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
+
+def : Pat<(v16i8 (xor
+ (v16i8 VPR128:$Rn),
+ (v16i8 Neon_AllOne))),
+ (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (xor
+ (v8i8 VPR64:$Rn),
+ (v8i8 Neon_AllOne))),
+ (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
+def : Pat<(v8i16 (xor
+ (v8i16 VPR128:$Rn),
+ (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+def : Pat<(v4i16 (xor
+ (v4i16 VPR64:$Rn),
+ (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
+ (NOT8b VPR64:$Rn)>;
+def : Pat<(v4i32 (xor
+ (v4i32 VPR128:$Rn),
+ (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+def : Pat<(v2i32 (xor
+ (v2i32 VPR64:$Rn),
+ (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
+ (NOT8b VPR64:$Rn)>;
+def : Pat<(v2i64 (xor
+ (v2i64 VPR128:$Rn),
+ (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
+ (NOT16b VPR128:$Rn)>;
+
+def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
+ (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
+def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
+ (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
+
+multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
+ SDPatternOperator Neon_Op> {
+ def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4f32 VPR128:$Rd),
+ (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [(set (v2f64 VPR128:$Rd),
+ (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2f32 VPR64:$Rd),
+ (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
+defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
+
+multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
+ def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.8b, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ let Constraints = "$Rd = $src" in {
+ def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.16b, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.8h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
+defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
+defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
+defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
+
+multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
+ SDPatternOperator Neon_Op> {
+ def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
+ (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
+
+ def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
+ (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
+
+ def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
+ (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
+
+ def : Pat<(v16i8 (concat_vectors
+ (v8i8 VPR64:$src),
+ (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 8h16b)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+
+ def : Pat<(v8i16 (concat_vectors
+ (v4i16 VPR64:$src),
+ (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 4s8h)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+
+ def : Pat<(v4i32 (concat_vectors
+ (v2i32 VPR64:$src),
+ (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
+ (!cast<Instruction>(Prefix # 2d4s)
+ (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
+ VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
+defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
+defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
+
+multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
+ let DecoderMethod = "DecodeSHLLInstruction" in {
+ def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact8:$Imm),
+ asmop # "\t$Rd.8h, $Rn.8b, $Imm",
+ [], NoItinerary>;
+
+ def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact16:$Imm),
+ asmop # "\t$Rd.4s, $Rn.4h, $Imm",
+ [], NoItinerary>;
+
+ def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR64:$Rn, uimm_exact32:$Imm),
+ asmop # "\t$Rd.2d, $Rn.2s, $Imm",
+ [], NoItinerary>;
+
+ def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact8:$Imm),
+ asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
+ [], NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact16:$Imm),
+ asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
+ [], NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$Rn, uimm_exact32:$Imm),
+ asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
+ [], NoItinerary>;
+ }
+}
+
+defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
+
+class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
+ SDPatternOperator ExtOp, Operand Neon_Imm,
+ string suffix>
+ : Pat<(DesTy (shl
+ (DesTy (ExtOp (OpTy VPR64:$Rn))),
+ (DesTy (Neon_vdup
+ (i32 Neon_Imm:$Imm))))),
+ (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
+
+class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
+ SDPatternOperator ExtOp, Operand Neon_Imm,
+ string suffix, PatFrag GetHigh>
+ : Pat<(DesTy (shl
+ (DesTy (ExtOp
+ (OpTy (GetHigh VPR128:$Rn)))),
+ (DesTy (Neon_vdup
+ (i32 Neon_Imm:$Imm))))),
+ (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
+
+def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
+ Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
+ Neon_High16B>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
+ Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
+ Neon_High8H>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
+ Neon_High4S>;
+def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
+ Neon_High4S>;
+
+multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
+ def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ let Constraints = "$src = $Rd" in {
+ def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.8h, $Rn.4s",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary>;
+ }
+}
+
+defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
+
+multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
+ SDPatternOperator f32_to_f16_Op,
+ SDPatternOperator f64_to_f32_Op> {
+
+ def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
+
+ def : Pat<(v8i16 (concat_vectors
+ (v4i16 VPR64:$src),
+ (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "4s8h")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ (v4f32 VPR128:$Rn))>;
+
+ def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
+
+ def : Pat<(v4f32 (concat_vectors
+ (v2f32 VPR64:$src),
+ (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "2d4s")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ (v2f64 VPR128:$Rn))>;
+}
+
+defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
+
+multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
+ bits<5> opcode> {
+ def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR64:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2d",
+ [], NoItinerary>;
+
+ def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.2d",
+ [], NoItinerary> {
+ let Constraints = "$src = $Rd";
+ }
+
+ def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))),
+ (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
+
+ def : Pat<(v4f32 (concat_vectors
+ (v2f32 VPR64:$src),
+ (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "2d4s")
+ (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
+ VPR128:$Rn)>;
+}
+
+defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
+
+def Neon_High4Float : PatFrag<(ops node:$in),
+ (extract_subvector (v4f32 node:$in), (iPTR 2))>;
+
+multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
+ def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4h",
+ [], NoItinerary>;
+
+ def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2s",
+ [], NoItinerary>;
+
+ def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "2\t$Rd.4s, $Rn.8h",
+ [], NoItinerary>;
+
+ def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "2\t$Rd.2d, $Rn.4s",
+ [], NoItinerary>;
+}
+
+defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
+
+multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
+ def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
+
+ def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
+ (v4i16 (Neon_High8H
+ (v8i16 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
+
+ def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
+ (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
+
+ def : Pat<(v2f64 (fextend
+ (v2f32 (Neon_High4Float
+ (v4f32 VPR128:$Rn))))),
+ (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
+}
+
+defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
+
+multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
+ ValueType ResTy4s, ValueType OpTy4s,
+ ValueType ResTy2d, ValueType OpTy2d,
+ ValueType ResTy2s, ValueType OpTy2s,
+ SDPatternOperator Neon_Op> {
+
+ def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (ResTy4s VPR128:$Rd),
+ (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.2d, $Rn.2d",
+ [(set (ResTy2d VPR128:$Rd),
+ (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (ResTy2s VPR64:$Rd),
+ (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
+ v2f64, v2i32, v2f32, Neon_Op>;
+}
+
+defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
+ int_aarch64_neon_fcvtns>;
+defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
+ int_aarch64_neon_fcvtnu>;
+defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
+ int_aarch64_neon_fcvtps>;
+defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
+ int_aarch64_neon_fcvtpu>;
+defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
+ int_aarch64_neon_fcvtms>;
+defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
+ int_aarch64_neon_fcvtmu>;
+defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
+defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
+defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
+ int_aarch64_neon_fcvtas>;
+defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
+ int_aarch64_neon_fcvtau>;
+
+multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
+ v2i64, v2f32, v2i32, Neon_Op>;
+}
+
+defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
+defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
+
+multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
+ v2f64, v2f32, v2f32, Neon_Op>;
+}
+
+defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
+ int_aarch64_neon_frintn>;
+defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
+defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
+defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
+defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
+defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
+defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
+defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
+ int_arm_neon_vrecpe>;
+defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
+ int_arm_neon_vrsqrte>;
+defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
+
+multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
+ bits<5> opcode, SDPatternOperator Neon_Op> {
+ def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
+ NoItinerary>;
+
+ def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
+ (outs VPR64:$Rd), (ins VPR64:$Rn),
+ asmop # "\t$Rd.2s, $Rn.2s",
+ [(set (v2i32 VPR64:$Rd),
+ (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
+ NoItinerary>;
+}
+
+defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
+ int_arm_neon_vrecpe>;
+defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
+ int_arm_neon_vrsqrte>;
+
+// Crypto Class
+class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$src),
+ (v16i8 VPR128:$Rn))))],
+ NoItinerary>{
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
+def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
+
+class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_AES<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$Rn),
+ asmop # "\t$Rd.16b, $Rn.16b",
+ [(set (v16i8 VPR128:$Rd),
+ (v16i8 (opnode (v16i8 VPR128:$Rn))))],
+ NoItinerary>;
+
+def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
+def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
+
+class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
+ asmop # "\t$Rd.4s, $Rn.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
+ int_arm_neon_sha1su1>;
+def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
+ int_arm_neon_sha256su0>;
+
+class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
+ string asmop, SDPatternOperator opnode>
+ : NeonI_Crypto_SHA<size, opcode,
+ (outs FPR32:$Rd), (ins FPR32:$Rn),
+ asmop # "\t$Rd, $Rn",
+ [(set (v1i32 FPR32:$Rd),
+ (v1i32 (opnode (v1i32 FPR32:$Rn))))],
+ NoItinerary> {
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
+
+class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs VPR128:$Rd),
+ (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
+ [(set (v4i32 VPR128:$Rd),
+ (v4i32 (opnode (v4i32 VPR128:$src),
+ (v4i32 VPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
+ int_arm_neon_sha1su0>;
+def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
+ int_arm_neon_sha256su1>;
+
+class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v4i32 FPR128:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
+ int_arm_neon_sha256h>;
+def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
+ int_arm_neon_sha256h2>;
+
+class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop,
+ SDPatternOperator opnode>
+ : NeonI_Crypto_3VSHA<size, opcode,
+ (outs FPR128:$Rd),
+ (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
+ asmop # "\t$Rd, $Rn, $Rm.4s",
+ [(set (v4i32 FPR128:$Rd),
+ (v4i32 (opnode (v4i32 FPR128:$src),
+ (v1i32 FPR32:$Rn),
+ (v4i32 VPR128:$Rm))))],
+ NoItinerary> {
+ let Constraints = "$src = $Rd";
+ let Predicates = [HasNEON, HasCrypto];
+}
+
+def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>;
+def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>;
+def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>;
+
+//
+// Patterns for handling half-precision values
+//
+
+// Convert f16 value coming in as i16 value to f32
+def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
+ (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
+def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
+ (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
+
+def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
+ f32_to_f16 (f32 FPR32:$Rn))))))),
+ (f32 FPR32:$Rn)>;
+
+// Patterns for vector extract of half-precision FP value in i16 storage type
+def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
+ (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
+ (FCVTsh (f16 (DUPhv_H
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ neon_uimm2_bare:$Imm)))>;
+
+def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
+ (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
+ (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
+
+// Patterns for vector insert of half-precision FP value 0 in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
+ (neon_uimm3_bare:$Imm))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
+ sub_16)),
+ neon_uimm3_bare:$Imm, 0))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
+ (neon_uimm2_bare:$Imm))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
+ sub_16)),
+ neon_uimm2_bare:$Imm, 0)),
+ sub_64))>;
+
+// Patterns for vector insert of half-precision FP value in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint
+ (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
+ (neon_uimm3_bare:$Imm))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
+ sub_16)),
+ neon_uimm3_bare:$Imm, 0))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint
+ (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
+ (neon_uimm2_bare:$Imm))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0),
+ (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
+ sub_16)),
+ neon_uimm2_bare:$Imm, 0)),
+ sub_64))>;
+
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
+ (neon_uimm3_bare:$Imm1))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
+
+// Patterns for vector copy of half-precision FP value in i16 storage type
+def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
+ (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
+ 65535)))))))),
+ (neon_uimm3_bare:$Imm1))),
+ (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
+
+def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
+ (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
+ (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
+ 65535)))))))),
+ (neon_uimm3_bare:$Imm1))),
+ (v4i16 (EXTRACT_SUBREG
+ (v8i16 (INSELh
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
+ (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
+ neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
+ sub_64))>;
+
-def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>;
-def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))),
- (f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>;
-def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>;
-def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))),
- (f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>;
-
-def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
- (v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
- (v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
- (v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
- (v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))),
- (v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
-def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
- (v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
- (v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
- (v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
- (v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
- (v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
-def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
- (v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
- sub_alias))>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 7ce5ce3..8cfb968 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -121,7 +121,7 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index b3a81b1..4e2022c 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -18,9 +18,19 @@ def sub_32 : SubRegIndex<32>;
def sub_16 : SubRegIndex<16>;
def sub_8 : SubRegIndex<8>;
-// The VPR registers are handled as sub-registers of FPR equivalents, but
-// they're really the same thing. We give this concept a special index.
-def sub_alias : SubRegIndex<128>;
+// Note: Code depends on these having consecutive numbers.
+def qqsub : SubRegIndex<256, 256>;
+
+def qsub_0 : SubRegIndex<128>;
+def qsub_1 : SubRegIndex<128, 128>;
+def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>;
+def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>;
+
+def dsub_0 : SubRegIndex<64>;
+def dsub_1 : SubRegIndex<64, 64>;
+def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
+def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
+def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
}
// Registers are identified with 5-bit ID numbers.
@@ -137,60 +147,51 @@ foreach Index = 0-31 in {
}
-def FPR8 : RegisterClass<"AArch64", [i8], 8,
+def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8,
(sequence "B%u", 0, 31)> {
}
-def FPR16 : RegisterClass<"AArch64", [f16], 16,
+def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
(sequence "H%u", 0, 31)> {
}
-def FPR32 : RegisterClass<"AArch64", [f32], 32,
+def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32,
(sequence "S%u", 0, 31)> {
}
-def FPR64 : RegisterClass<"AArch64", [f64], 64,
- (sequence "D%u", 0, 31)> {
-}
+def FPR64 : RegisterClass<"AArch64",
+ [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
+ 64, (sequence "D%u", 0, 31)>;
-def FPR128 : RegisterClass<"AArch64", [f128], 128,
- (sequence "Q%u", 0, 31)> {
-}
+def FPR128 : RegisterClass<"AArch64",
+ [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
+ 128, (sequence "Q%u", 0, 31)>;
+
+def FPR64Lo : RegisterClass<"AArch64",
+ [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
+ 64, (sequence "D%u", 0, 15)>;
+def FPR128Lo : RegisterClass<"AArch64",
+ [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
+ 128, (sequence "Q%u", 0, 15)>;
//===----------------------------------------------------------------------===//
// Vector registers:
//===----------------------------------------------------------------------===//
-// NEON registers simply specify the overall vector, and it's expected that
-// Instructions will individually specify the acceptable data layout. In
-// principle this leaves two approaches open:
-// + An operand, giving a single ADDvvv instruction (for example). This turns
-// out to be unworkable in the assembly parser (without every Instruction
-// having a "cvt" function, at least) because the constraints can't be
-// properly enforced. It also complicates specifying patterns since each
-// instruction will accept many types.
-// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
-// details about NEON registers, but simplifies most other details.
-//
-// The second approach was taken.
-
-foreach Index = 0-31 in {
- def V # Index : AArch64RegWithSubs<Index, "v" # Index,
- [!cast<Register>("Q" # Index)],
- [sub_alias]>,
- DwarfRegNum<[!add(Index, 64)]>;
+def VPR64AsmOperand : AsmOperandClass {
+ let Name = "VPR";
+ let PredicateMethod = "isReg";
+ let RenderMethod = "addRegOperands";
}
-// These two classes contain the same registers, which should be reasonably
-// sensible for MC and allocation purposes, but allows them to be treated
-// separately for things like stack spilling.
-def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
- (sequence "V%u", 0, 31)>;
+def VPR64 : RegisterOperand<FPR64, "printVPRRegister">;
-def VPR128 : RegisterClass<"AArch64",
- [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
- (sequence "V%u", 0, 31)>;
+def VPR128 : RegisterOperand<FPR128, "printVPRRegister">;
+
+def VPR64Lo : RegisterOperand<FPR64Lo, "printVPRRegister">;
+
+def VPR128Lo : RegisterOperand<FPR128Lo, "printVPRRegister">;
// Flags register
def NZCV : Register<"nzcv"> {
@@ -201,3 +202,90 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
let CopyCost = -1;
let isAllocatable = 0;
}
+
+//===----------------------------------------------------------------------===//
+// Consecutive vector registers
+//===----------------------------------------------------------------------===//
+// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31
+def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
+ [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+
+// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2)]>;
+
+// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2
+def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2), (rotl FPR64, 3)]>;
+
+// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1],
+ [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+
+// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1
+def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2)]>;
+
+// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2
+def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2), (rotl FPR128, 3)]>;
+
+// The followings are super register classes to model 2/3/4 consecutive
+// 64-bit/128-bit registers.
+
+def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>;
+
+def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> {
+ let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>;
+
+def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>;
+
+def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> {
+ let Size = 384; // 3 x 128 bits, we have no predefined type of that size.
+}
+
+def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>;
+
+
+// The followings are vector list operands
+multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count,
+ RegisterClass RegList> {
+ def _asmoperand : AsmOperandClass {
+ let Name = PREFIX # LAYOUT # Count;
+ let RenderMethod = "addVectorListOperands";
+ let PredicateMethod =
+ "isVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">";
+ let ParserMethod = "ParseVectorList";
+ }
+
+ def _operand : RegisterOperand<RegList,
+ "printVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">"> {
+ let ParserMatchClass =
+ !cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand");
+ }
+}
+
+multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList,
+ RegisterClass QRegList> {
+ defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>;
+ defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>;
+ defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>;
+ defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>;
+ defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>;
+ defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>;
+ defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>;
+ defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>;
+}
+
+// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand
+defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>;
+defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>;
+defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>;
+defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; \ No newline at end of file
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index d71bb4e..5c693c1 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -25,11 +25,31 @@
using namespace llvm;
+// Pin the vtable to this file.
+void AArch64Subtarget::anchor() {}
+
AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS)
- : AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false),
- TargetTriple(TT) {
+ : AArch64GenSubtargetInfo(TT, CPU, FS), HasFPARMv8(false), HasNEON(false),
+ HasCrypto(false), TargetTriple(TT), CPUString(CPU) {
+
+ initializeSubtargetFeatures(CPU, FS);
+}
+
+void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,
+ StringRef FS) {
+ if (CPU.empty())
+ CPUString = "generic";
+
+ std::string FullFS = FS;
+ if (CPUString == "generic") {
+ // Enable FP by default.
+ if (FullFS.empty())
+ FullFS = "+fp-armv8";
+ else
+ FullFS = "+fp-armv8," + FullFS;
+ }
- ParseSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPU, FullFS);
}
bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 35a7c8d..bbfd3bc 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -27,18 +27,31 @@ class StringRef;
class GlobalValue;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
+ virtual void anchor();
protected:
+ bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+
+ /// CPUString - String name of used CPU.
+ std::string CPUString;
+
+private:
+ void initializeSubtargetFeatures(StringRef CPU, StringRef FS);
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS);
+ virtual bool enableMachineScheduler() const {
+ return true;
+ }
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -46,11 +59,13 @@ public:
bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
-
bool hasCrypto() const { return HasCrypto; }
+
+ const std::string & getCPUString() const { return CPUString; }
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 43e91ac..fbbce11 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -54,8 +54,9 @@ public:
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
// Initialize the set of available features.
@@ -126,6 +127,11 @@ public:
OperandMatchResultTy
ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout,
+ SMLoc &LayoutLoc);
+
+ OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &);
+
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -153,6 +159,7 @@ private:
k_Immediate, // Including expressions referencing symbols
k_Register,
k_ShiftExtend,
+ k_VectorList, // A sequential list of 1 to 4 registers.
k_SysReg, // The register operand of MRS and MSR instructions
k_Token, // The mnemonic; other raw tokens the auto-generated
k_WrappedRegister // Load/store exclusive permit a wrapped register.
@@ -188,6 +195,13 @@ private:
bool ImplicitAmount;
};
+ // A vector register list is a sequential list of 1 to 4 registers.
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ A64Layout::VectorLayout Layout;
+ };
+
struct SysRegOp {
const char *Data;
unsigned Length;
@@ -205,6 +219,7 @@ private:
struct ImmOp Imm;
struct RegOp Reg;
struct ShiftExtendOp ShiftExtend;
+ struct VectorListOp VectorList;
struct SysRegOp SysReg;
struct TokOp Tok;
};
@@ -664,6 +679,44 @@ public:
return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
}
+ // if 0 < value <= w, return true
+ bool isShrFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value > 0 && Value <= w;
+ }
+
+ bool isShrImm8() const { return isShrFixedWidth(8); }
+
+ bool isShrImm16() const { return isShrFixedWidth(16); }
+
+ bool isShrImm32() const { return isShrFixedWidth(32); }
+
+ bool isShrImm64() const { return isShrFixedWidth(64); }
+
+ // if 0 <= value < w, return true
+ bool isShlFixedWidth(int w) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < w;
+ }
+
+ bool isShlImm8() const { return isShlFixedWidth(8); }
+
+ bool isShlImm16() const { return isShlFixedWidth(16); }
+
+ bool isShlImm32() const { return isShlFixedWidth(32); }
+
+ bool isShlImm64() const { return isShlFixedWidth(64); }
+
bool isNeonMovImmShiftLSL() const {
if (!isShiftOrExtend())
return false;
@@ -697,6 +750,12 @@ public:
return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
}
+ template <A64Layout::VectorLayout Layout, unsigned Count>
+ bool isVectorList() const {
+ return Kind == k_VectorList && VectorList.Layout == Layout &&
+ VectorList.Count == Count;
+ }
+
template <int MemSize> bool isSImm7Scaled() const {
if (!isImm())
return false;
@@ -756,6 +815,17 @@ public:
return true;
}
+ // if value == N, return true
+ template<int N>
+ bool isExactImm() const {
+ if (!isImm()) return false;
+
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+
+ return CE->getValue() == N;
+ }
+
static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
unsigned ShiftAmount,
bool ImplicitAmount,
@@ -817,6 +887,18 @@ public:
return Op;
}
+ static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
+ A64Layout::VectorLayout Layout,
+ SMLoc S, SMLoc E) {
+ AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.Layout = Layout;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
Op->Tok.Data = Str.data();
@@ -1164,6 +1246,11 @@ public:
}
Inst.addOperand(MCOperand::CreateImm(Imm));
}
+
+ void addVectorListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ }
};
} // end anonymous namespace.
@@ -1203,7 +1290,6 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
else
return MatchOperand_Success;
}
-
// ... or it might be a symbolish thing
}
// Fall through
@@ -1247,7 +1333,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return ParseOperand(Operands, Mnemonic);
}
// The following will likely be useful later, but not in very early cases
- case AsmToken::LCurly: // Weird SIMD lists
+ case AsmToken::LCurly: // SIMD vector list is not parsed here
llvm_unreachable("Don't know how to deal with '{' in operand");
return MatchOperand_ParseFail;
}
@@ -1405,7 +1491,7 @@ AArch64AsmParser::ParseImmWithLSLOperand(
// The optional operand must be "lsl #N" where N is non-negative.
if (Parser.getTok().is(AsmToken::Identifier)
- && Parser.getTok().getIdentifier().lower() == "lsl") {
+ && Parser.getTok().getIdentifier().equals_lower("lsl")) {
Parser.Lex();
if (Parser.getTok().is(AsmToken::Hash)) {
@@ -1462,9 +1548,8 @@ AArch64AsmParser::ParseCRxOperand(
return MatchOperand_ParseFail;
}
- std::string LowerTok = Parser.getTok().getIdentifier().lower();
- StringRef Tok(LowerTok);
- if (Tok[0] != 'c') {
+ StringRef Tok = Parser.getTok().getIdentifier();
+ if (Tok[0] != 'c' && Tok[0] != 'C') {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
}
@@ -1536,22 +1621,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
std::string LowerReg = Tok.getString().lower();
size_t DotPos = LowerReg.find('.');
- RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
- if (RegNum == AArch64::NoRegister) {
- RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
- .Case("ip0", AArch64::X16)
- .Case("ip1", AArch64::X17)
- .Case("fp", AArch64::X29)
- .Case("lr", AArch64::X30)
- .Default(AArch64::NoRegister);
- }
- if (RegNum == AArch64::NoRegister)
- return false;
-
+ bool IsVec128 = false;
SMLoc S = Tok.getLoc();
RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
- if (DotPos == StringRef::npos) {
+ if (DotPos == std::string::npos) {
Layout = StringRef();
} else {
// Everything afterwards needs to be a literal token, expected to be
@@ -1561,20 +1635,78 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
// gives us a permanent string to use in the token (a pointer into LowerReg
// would go out of scope when we return).
LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
- std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
+ StringRef LayoutText = StringRef(LowerReg).substr(DotPos);
+
+ // See if it's a 128-bit layout first.
Layout = StringSwitch<const char *>(LayoutText)
- .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
- .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
- .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
- .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
+ .Case(".q", ".q").Case(".1q", ".1q")
+ .Case(".d", ".d").Case(".2d", ".2d")
+ .Case(".s", ".s").Case(".4s", ".4s")
+ .Case(".h", ".h").Case(".8h", ".8h")
+ .Case(".b", ".b").Case(".16b", ".16b")
.Default("");
+ if (Layout.size() != 0)
+ IsVec128 = true;
+ else {
+ Layout = StringSwitch<const char *>(LayoutText)
+ .Case(".1d", ".1d")
+ .Case(".2s", ".2s")
+ .Case(".4h", ".4h")
+ .Case(".8b", ".8b")
+ .Default("");
+ }
+
if (Layout.size() == 0) {
- // Malformed register
+ // If we've still not pinned it down the register is malformed.
return false;
}
}
+ RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
+ if (RegNum == AArch64::NoRegister) {
+ RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
+ .Case("ip0", AArch64::X16)
+ .Case("ip1", AArch64::X17)
+ .Case("fp", AArch64::X29)
+ .Case("lr", AArch64::X30)
+ .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0)
+ .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1)
+ .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2)
+ .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3)
+ .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4)
+ .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5)
+ .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6)
+ .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7)
+ .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8)
+ .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9)
+ .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10)
+ .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11)
+ .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12)
+ .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13)
+ .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14)
+ .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15)
+ .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16)
+ .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17)
+ .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18)
+ .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19)
+ .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20)
+ .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21)
+ .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22)
+ .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23)
+ .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24)
+ .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25)
+ .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26)
+ .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27)
+ .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28)
+ .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29)
+ .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30)
+ .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31)
+ .Default(AArch64::NoRegister);
+ }
+ if (RegNum == AArch64::NoRegister)
+ return false;
+
return true;
}
@@ -1606,6 +1738,7 @@ AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
case 'h': NumLanes = 8; break;
case 's': NumLanes = 4; break;
case 'd': NumLanes = 2; break;
+ case 'q': NumLanes = 1; break;
}
}
@@ -1824,6 +1957,148 @@ AArch64AsmParser::ParseShiftExtend(
return MatchOperand_Success;
}
+/// Try to parse a vector register token, If it is a vector register,
+/// the token is eaten and return true. Otherwise return false.
+bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc,
+ StringRef &Layout, SMLoc &LayoutLoc) {
+ bool IsVector = true;
+
+ if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
+ IsVector = false;
+ else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID]
+ .contains(RegNum) &&
+ !AArch64MCRegisterClasses[AArch64::FPR128RegClassID]
+ .contains(RegNum))
+ IsVector = false;
+ else if (Layout.size() == 0)
+ IsVector = false;
+
+ if (!IsVector)
+ Error(Parser.getTok().getLoc(), "expected vector type register");
+
+ Parser.Lex(); // Eat this token.
+ return IsVector;
+}
+
+
+// A vector list contains 1-4 consecutive registers.
+// Now there are two kinds of vector list when number of vector > 1:
+// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
+// (2) {Vn.layout - Vm.layout}
+// If the layout is like .b/.h/.s/.d, also parse the lane.
+AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ if (Parser.getTok().isNot(AsmToken::LCurly)) {
+ Error(Parser.getTok().getLoc(), "'{' expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc SLoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '{' token.
+
+ unsigned Reg, Count = 1;
+ StringRef LayoutStr;
+ SMLoc RegEndLoc, LayoutLoc;
+ if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc))
+ return MatchOperand_ParseFail;
+
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
+
+ unsigned Reg2;
+ StringRef LayoutStr2;
+ SMLoc RegEndLoc2, LayoutLoc2;
+ SMLoc RegLoc2 = Parser.getTok().getLoc();
+
+ if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
+ return MatchOperand_ParseFail;
+ unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg);
+
+ if (LayoutStr != LayoutStr2) {
+ Error(LayoutLoc2, "expected the same vector layout");
+ return MatchOperand_ParseFail;
+ }
+ if (Space == 0 || Space > 3) {
+ Error(RegLoc2, "invalid number of vectors");
+ return MatchOperand_ParseFail;
+ }
+
+ Count += Space;
+ } else {
+ unsigned LastReg = Reg;
+ while (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma.
+ unsigned Reg2;
+ StringRef LayoutStr2;
+ SMLoc RegEndLoc2, LayoutLoc2;
+ SMLoc RegLoc2 = Parser.getTok().getLoc();
+
+ if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
+ return MatchOperand_ParseFail;
+ unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg)
+ : (Reg2 + 32 - LastReg);
+ Count++;
+
+ // The space between two vectors should be 1. And they should have the same layout.
+ // Total count shouldn't be great than 4
+ if (Space != 1) {
+ Error(RegLoc2, "invalid space between two vectors");
+ return MatchOperand_ParseFail;
+ }
+ if (LayoutStr != LayoutStr2) {
+ Error(LayoutLoc2, "expected the same vector layout");
+ return MatchOperand_ParseFail;
+ }
+ if (Count > 4) {
+ Error(RegLoc2, "invalid number of vectors");
+ return MatchOperand_ParseFail;
+ }
+
+ LastReg = Reg2;
+ }
+ }
+
+ if (Parser.getTok().isNot(AsmToken::RCurly)) {
+ Error(Parser.getTok().getLoc(), "'}' expected");
+ return MatchOperand_ParseFail;
+ }
+ SMLoc ELoc = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat '}' token.
+
+ A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr);
+ if (Count > 1) { // If count > 1, create vector list using super register.
+ bool IsVec64 = (Layout < A64Layout::VL_16B);
+ static unsigned SupRegIDs[3][2] = {
+ { AArch64::QPairRegClassID, AArch64::DPairRegClassID },
+ { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID },
+ { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID }
+ };
+ unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)];
+ unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
+ const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ Reg = MRI->getMatchingSuperReg(Reg, Sub0,
+ &AArch64MCRegisterClasses[SupRegID]);
+ }
+ Operands.push_back(
+ AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc));
+
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ uint32_t NumLanes = 0;
+ switch(Layout) {
+ case A64Layout::VL_B : NumLanes = 16; break;
+ case A64Layout::VL_H : NumLanes = 8; break;
+ case A64Layout::VL_S : NumLanes = 4; break;
+ case A64Layout::VL_D : NumLanes = 2; break;
+ default:
+ SMLoc Loc = getLexer().getLoc();
+ Error(Loc, "expected comma before next operand");
+ return MatchOperand_ParseFail;
+ }
+ return ParseNEONLane(Operands, NumLanes);
+ } else {
+ return MatchOperand_Success;
+ }
+}
+
// FIXME: We would really like to be able to tablegen'erate this.
bool AArch64AsmParser::
validateInstruction(MCInst &Inst,
@@ -2240,6 +2515,30 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_Width64:
return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
"expected integer in range [<lsb>, 63]");
+ case Match_ShrImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 8]");
+ case Match_ShrImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 16]");
+ case Match_ShrImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 32]");
+ case Match_ShrImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [1, 64]");
+ case Match_ShlImm8:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 7]");
+ case Match_ShlImm16:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 15]");
+ case Match_ShlImm32:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 31]");
+ case Match_ShlImm64:
+ return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
+ "expected integer in range [0, 63]");
}
llvm_unreachable("Implement any new match types added!");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index a88a8e8..be4d7f2 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -82,15 +82,38 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS,
@@ -113,6 +136,30 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
unsigned FullImm,
@@ -183,6 +230,17 @@ static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In);
@@ -331,6 +389,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus
+DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
+ return MCDisassembler::Fail;
+
+ return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder);
+}
static DecodeStatus
DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
@@ -343,28 +409,80 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- if (RegNo > 31)
+static DecodeStatus
+DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ if (RegNo > 15)
return MCDisassembler::Fail;
- uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
+ return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 30)
+ return MCDisassembler::Fail;
+
+ uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
-static DecodeStatus
-DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo,
+ unsigned RegID,
+ const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
- uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
+ uint16_t Register = getReg(Decoder, RegID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
+static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID,
+ Decoder);
+}
+
+static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID,
+ Decoder);
+}
+
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS,
uint64_t Address,
@@ -413,7 +531,73 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(8 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(16 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(32 - Val));
+ return MCDisassembler::Success;
+}
+static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 7)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 15)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 31)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
+ uint64_t Address,
+ const void *Decoder) {
+ if (Val > 63)
+ return MCDisassembler::Fail;
+
+ Inst.addOperand(MCOperand::CreateImm(Val));
+ return MCDisassembler::Success;
+}
template<int RegWidth>
static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
@@ -570,11 +754,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
if (IsToVec) {
- DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
} else {
DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
- DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
}
// Add the lane
@@ -838,3 +1022,551 @@ DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
return MCDisassembler::Success;
}
+
+// Decode post-index vector load/store instructions.
+// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
+// operand is an immediate equal the the length of vector list in bytes,
+// or Rm is decoded to a GPR64noxzr register.
+static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(Insn, 16, 5);
+ unsigned Opcode = fieldFromInstruction(Insn, 12, 4);
+ unsigned IsLoad = fieldFromInstruction(Insn, 22, 1);
+ // 0 for 64bit vector list, 1 for 128bit vector list
+ unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1);
+
+ unsigned NumVecs;
+ switch (Opcode) {
+ case 0: // ld4/st4
+ case 2: // ld1/st1 with 4 vectors
+ NumVecs = 4; break;
+ case 4: // ld3/st3
+ case 6: // ld1/st1 with 3 vectors
+ NumVecs = 3; break;
+ case 7: // ld1/st1 with 1 vector
+ NumVecs = 1; break;
+ case 8: // ld2/st2
+ case 10: // ld1/st1 with 2 vectors
+ NumVecs = 2; break;
+ default:
+ llvm_unreachable("Invalid opcode for post-index load/store instructions");
+ }
+
+ // Decode vector list of 1/2/3/4 vectors for load instructions.
+ if (IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte
+ Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8)));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ // Decode vector list of 1/2/3/4 vectors for load instructions.
+ if (!IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ }
+ }
+
+ return MCDisassembler::Success;
+}
+
+// Decode post-index vector load/store lane instructions.
+// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
+// operand is an immediate equal the the length of the changed bytes,
+// or Rm is decoded to a GPR64noxzr register.
+static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ bool Is64bitVec = false;
+ bool IsLoadDup = false;
+ bool IsLoad = false;
+ // The total number of bytes transferred.
+ // TransferBytes = NumVecs * OneLaneBytes
+ unsigned TransferBytes = 0;
+ unsigned NumVecs = 0;
+ unsigned Opc = Inst.getOpcode();
+ switch (Opc) {
+ case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
+ case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
+ case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
+ case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register:
+ TransferBytes = 8; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
+ case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
+ case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
+ case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register:
+ TransferBytes = 8; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
+ case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
+ case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
+ case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register:
+ TransferBytes = 16; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
+ case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
+ case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
+ case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register:
+ TransferBytes = 16; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
+ case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
+ case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
+ case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register:
+ TransferBytes = 24; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
+ case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register:
+ case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register:
+ case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register:
+ TransferBytes = 24; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
+ case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
+ case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
+ case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: {
+ switch (Opc) {
+ case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register:
+ TransferBytes = 32; break;
+ }
+ Is64bitVec = true;
+ IsLoadDup = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
+ case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register:
+ case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register:
+ case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: {
+ switch (Opc) {
+ case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register:
+ TransferBytes = 32; break;
+ }
+ IsLoadDup = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
+ case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
+ case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
+ case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
+ TransferBytes = 1; break;
+ case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
+ TransferBytes = 2; break;
+ case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
+ TransferBytes = 4; break;
+ case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register:
+ TransferBytes = 8; break;
+ }
+ IsLoad = true;
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
+ case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
+ case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
+ case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
+ TransferBytes = 2; break;
+ case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
+ TransferBytes = 4; break;
+ case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
+ TransferBytes = 8; break;
+ case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register:
+ TransferBytes = 16; break;
+ }
+ IsLoad = true;
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
+ case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
+ case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
+ case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
+ TransferBytes = 3; break;
+ case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
+ TransferBytes = 6; break;
+ case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
+ TransferBytes = 12; break;
+ case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register:
+ TransferBytes = 24; break;
+ }
+ IsLoad = true;
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
+ case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
+ case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
+ case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
+ TransferBytes = 4; break;
+ case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
+ TransferBytes = 8; break;
+ case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
+ TransferBytes = 16; break;
+ case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register:
+ TransferBytes = 32; break;
+ }
+ IsLoad = true;
+ NumVecs = 4;
+ break;
+ }
+
+ case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
+ case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
+ case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
+ case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
+ TransferBytes = 1; break;
+ case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
+ TransferBytes = 2; break;
+ case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
+ TransferBytes = 4; break;
+ case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register:
+ TransferBytes = 8; break;
+ }
+ NumVecs = 1;
+ break;
+ }
+
+ case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
+ case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
+ case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
+ case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
+ TransferBytes = 2; break;
+ case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
+ TransferBytes = 4; break;
+ case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
+ TransferBytes = 8; break;
+ case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register:
+ TransferBytes = 16; break;
+ }
+ NumVecs = 2;
+ break;
+ }
+
+ case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
+ case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
+ case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
+ case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
+ TransferBytes = 3; break;
+ case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
+ TransferBytes = 6; break;
+ case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
+ TransferBytes = 12; break;
+ case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register:
+ TransferBytes = 24; break;
+ }
+ NumVecs = 3;
+ break;
+ }
+
+ case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
+ case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
+ case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
+ case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: {
+ switch (Opc) {
+ case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
+ TransferBytes = 4; break;
+ case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
+ TransferBytes = 8; break;
+ case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
+ TransferBytes = 16; break;
+ case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register:
+ TransferBytes = 32; break;
+ }
+ NumVecs = 4;
+ break;
+ }
+
+ default:
+ return MCDisassembler::Fail;
+ } // End of switch (Opc)
+
+ unsigned Rt = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(Insn, 16, 5);
+
+ // Decode post-index of load duplicate lane
+ if (IsLoadDup) {
+ switch (NumVecs) {
+ case 1:
+ Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder)
+ : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
+ Inst.addOperand(MCOperand::CreateImm(TransferBytes));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ return MCDisassembler::Success;
+ }
+
+ // Decode post-index of load/store lane
+ // Loads have a vector list as output.
+ if (IsLoad) {
+ switch (NumVecs) {
+ case 1:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+ }
+
+ // Decode write back register, which is equal to Rn.
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+
+ if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
+ Inst.addOperand(MCOperand::CreateImm(TransferBytes));
+ else // Decode Rm
+ DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+
+ // Decode the source vector list.
+ switch (NumVecs) {
+ case 1:
+ DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 2:
+ DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 3:
+ DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ break;
+ case 4:
+ DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
+ }
+
+ // Decode lane
+ unsigned Q = fieldFromInstruction(Insn, 30, 1);
+ unsigned S = fieldFromInstruction(Insn, 10, 3);
+ unsigned lane = 0;
+ // Calculate the number of lanes by number of vectors and transfered bytes.
+ // NumLanes = 16 bytes / bytes of each lane
+ unsigned NumLanes = 16 / (TransferBytes / NumVecs);
+ switch (NumLanes) {
+ case 16: // A vector has 16 lanes, each lane is 1 bytes.
+ lane = (Q << 3) | S;
+ break;
+ case 8:
+ lane = (Q << 2) | (S >> 1);
+ break;
+ case 4:
+ lane = (Q << 1) | (S >> 2);
+ break;
+ case 2:
+ lane = Q;
+ break;
+ }
+ Inst.addOperand(MCOperand::CreateImm(lane));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(Insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(Insn, 5, 5);
+ unsigned size = fieldFromInstruction(Insn, 22, 2);
+ unsigned Q = fieldFromInstruction(Insn, 30, 1);
+
+ DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
+
+ if(Q)
+ DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
+ else
+ DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder);
+
+ switch (size) {
+ case 0:
+ Inst.addOperand(MCOperand::CreateImm(8));
+ break;
+ case 1:
+ Inst.addOperand(MCOperand::CreateImm(16));
+ break;
+ case 2:
+ Inst.addOperand(MCOperand::CreateImm(32));
+ break;
+ default :
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
+
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index b624331..0438de3 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
O << "#" << (Imm * MemScale);
}
+void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Reg = MI->getOperand(OpNo).getReg();
+ std::string Name = getRegisterName(Reg);
+ Name[0] = 'v';
+ O << Name;
+}
+
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
@@ -454,8 +462,8 @@ void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
o << "#0x0";
}
-void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MOUImm = MI->getOperand(OpNum);
assert(MOUImm.isImm() &&
@@ -467,6 +475,18 @@ void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum,
O.write_hex(Imm);
}
+void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &MOUImm = MI->getOperand(OpNum);
+
+ assert(MOUImm.isImm()
+ && "Immediate operand required for Neon vector immediate inst.");
+
+ unsigned Imm = MOUImm.getImm();
+ O << Imm;
+}
+
void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
unsigned OpNum,
raw_ostream &O) {
@@ -487,3 +507,33 @@ void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
O << "#0x";
O.write_hex(Mask);
}
+
+// If Count > 1, there are two valid kinds of vector list:
+// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
+// (2) {Vn.layout - Vm.layout}
+// We choose the first kind as output.
+template <A64Layout::VectorLayout Layout, unsigned Count>
+void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors");
+
+ unsigned Reg = MI->getOperand(OpNum).getReg();
+ std::string LayoutStr = A64VectorLayoutToString(Layout);
+ O << "{";
+ if (Count > 1) { // Print sub registers separately
+ bool IsVec64 = (Layout < A64Layout::VL_16B);
+ unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
+ for (unsigned I = 0; I < Count; I++) {
+ std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++));
+ Name[0] = 'v';
+ O << Name << LayoutStr;
+ if (I != Count - 1)
+ O << ", ";
+ }
+ } else { // Print the register directly when NumVecs is 1.
+ std::string Name = getRegisterName(Reg);
+ Name[0] = 'v';
+ O << Name << LayoutStr;
+ }
+ O << "}";
+}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index f7439be..37b7273 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -157,6 +157,7 @@ public:
void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+ void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
@@ -168,9 +169,13 @@ public:
void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+
+ template <A64Layout::VectorLayout Layout, unsigned Count>
+ void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
};
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index a3373b1..8a9077c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -578,8 +578,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
}
MCAsmBackend *
-llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+llvm::createAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
-
return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS());
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 104e4d2..a64c463 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -55,11 +55,10 @@ namespace {
/// by MachO. Beware!
class AArch64ELFStreamer : public MCELFStreamer {
public:
- AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter)
- : MCELFStreamer(Context, TAB, OS, Emitter),
- MappingSymbolCounter(0), LastEMS(EMS_None) {
- }
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, 0, TAB, OS, Emitter), MappingSymbolCounter(0),
+ LastEMS(EMS_None) {}
~AArch64ELFStreamer() {}
@@ -129,7 +128,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 8ec8cbf..add874c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -31,11 +31,12 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() {
UseDataRegionDirectives = true;
- WeakRefDirective = "\t.weak\t";
-
HasLEB128 = true;
SupportsDebugInformation = true;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
}
+
+// Pin the vtable to this file.
+void AArch64ELFMCAsmInfo::anchor() {}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index a20bc47..d1dd285 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -14,13 +14,15 @@
#ifndef LLVM_AARCH64TARGETASMINFO_H
#define LLVM_AARCH64TARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- struct AArch64ELFMCAsmInfo : public MCAsmInfo {
- explicit AArch64ELFMCAsmInfo();
- };
+struct AArch64ELFMCAsmInfo : public MCAsmInfoELF {
+ explicit AArch64ELFMCAsmInfo();
+private:
+ virtual void anchor();
+};
} // namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index b9770b3..b41c566 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -59,6 +59,23 @@ public:
unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
+ unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups) const;
// Labels are handled mostly the same way: a symbol is needed, and
// just gets some fixup attached.
@@ -310,6 +327,45 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
}
+unsigned AArch64MCCodeEmitter::getShiftRightImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftRightImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return 64 - MI.getOperand(Op).getImm();
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm8(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 8;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm16(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 16;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm32(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 32;
+}
+
+unsigned AArch64MCCodeEmitter::getShiftLeftImm64(
+ const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const {
+ return MI.getOperand(Op).getImm() - 64;
+}
template<AArch64::Fixups fixupDesired> unsigned
AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 3849fe3..670e657 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -43,8 +43,9 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
uint8_t OSABI);
-MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createAArch64AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
} // End llvm namespace
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index e675efc..ce970b0 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -306,6 +306,65 @@ namespace A64SE {
};
}
+namespace A64Layout {
+ enum VectorLayout {
+ Invalid = -1,
+ VL_8B,
+ VL_4H,
+ VL_2S,
+ VL_1D,
+
+ VL_16B,
+ VL_8H,
+ VL_4S,
+ VL_2D,
+
+ // Bare layout for the 128-bit vector
+ // (only show ".b", ".h", ".s", ".d" without vector number)
+ VL_B,
+ VL_H,
+ VL_S,
+ VL_D
+ };
+}
+
+inline static const char *
+A64VectorLayoutToString(A64Layout::VectorLayout Layout) {
+ switch (Layout) {
+ case A64Layout::VL_8B: return ".8b";
+ case A64Layout::VL_4H: return ".4h";
+ case A64Layout::VL_2S: return ".2s";
+ case A64Layout::VL_1D: return ".1d";
+ case A64Layout::VL_16B: return ".16b";
+ case A64Layout::VL_8H: return ".8h";
+ case A64Layout::VL_4S: return ".4s";
+ case A64Layout::VL_2D: return ".2d";
+ case A64Layout::VL_B: return ".b";
+ case A64Layout::VL_H: return ".h";
+ case A64Layout::VL_S: return ".s";
+ case A64Layout::VL_D: return ".d";
+ default: llvm_unreachable("Unknown Vector Layout");
+ }
+}
+
+inline static A64Layout::VectorLayout
+A64StringToVectorLayout(StringRef LayoutStr) {
+ return StringSwitch<A64Layout::VectorLayout>(LayoutStr)
+ .Case(".8b", A64Layout::VL_8B)
+ .Case(".4h", A64Layout::VL_4H)
+ .Case(".2s", A64Layout::VL_2S)
+ .Case(".1d", A64Layout::VL_1D)
+ .Case(".16b", A64Layout::VL_16B)
+ .Case(".8h", A64Layout::VL_8H)
+ .Case(".4s", A64Layout::VL_4S)
+ .Case(".2d", A64Layout::VL_2D)
+ .Case(".b", A64Layout::VL_B)
+ .Case(".h", A64Layout::VL_H)
+ .Case(".s", A64Layout::VL_S)
+ .Case(".d", A64Layout::VL_D)
+ .Default(A64Layout::Invalid);
+}
+
namespace A64SysReg {
enum SysRegROValues {
MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index e8c2f7c..ff585b4 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -657,6 +657,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
Modified = true;
for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
E = Uses.end(); I != E; ++I) {
+ // Make sure to constrain the register class of the new register to
+ // match what we're replacing. Otherwise we can optimize a DPR_VFP2
+ // reference into a plain DPR, and that will end poorly. NewReg is
+ // always virtual here, so there will always be a matching subclass
+ // to find.
+ MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg()));
+
DEBUG(dbgs() << "Replacing operand "
<< **I << " with "
<< PrintReg(NewReg) << "\n");
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index e5da3a5..36e5680 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,7 +45,7 @@ def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
"Enable VFP4 instructions",
[FeatureVFP3, FeatureFP16]>;
-def FeatureV8FP : SubtargetFeature<"v8fp", "HasV8FP",
+def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
"true", "Enable ARMv8 FP",
[FeatureVFP4]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
@@ -67,6 +67,11 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable support for Performance Monitor extensions">;
def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
"Enable support for TrustZone security extensions">;
+def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
+ "Enable support for Cryptography extensions",
+ [FeatureNEON]>;
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable support for CRC instructions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -114,10 +119,24 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
-// M-series ISA?
-def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true",
+// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8).
+def FeatureVirtualization : SubtargetFeature<"virtualization",
+ "HasVirtualization", "true",
+ "Supports Virtualization extension",
+ [FeatureHWDiv, FeatureHWDivARM]>;
+
+// M-series ISA
+def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass",
"Is microcontroller profile ('M' series)">;
+// R-series ISA
+def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass",
+ "Is realtime profile ('R' series)">;
+
+// A-series ISA
+def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
+ "Is application profile ('A' series)">;
+
// Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too.
// See ARMInstrInfo.td for details.
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
@@ -135,15 +154,19 @@ def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true",
def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
"Support ARM v6 instructions",
[HasV5TEOps]>;
+def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
+ "Support ARM v6M instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6Ops, FeatureThumb2]>;
+ [HasV6MOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops]>;
+ [HasV7Ops, FeatureVirtualization,
+ FeatureMP]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -179,9 +202,23 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureFP16, FeatureVFP4,
+ [FeatureT2XtPk, FeatureVFP4,
+ FeatureMP, FeatureHWDiv, FeatureHWDivARM,
FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
+ FeatureTrustZone, FeatureVirtualization]>;
+
+def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+ "Cortex-A53 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
+def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+ "Cortex-A57 ARM processors",
+ [FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto, FeatureCRC]>;
+
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc,
@@ -243,7 +280,7 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
// V6T2 Processors.
@@ -258,26 +295,30 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
def : ProcessorModel<"cortex-a5", CortexA8Model,
[ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
def : ProcessorModel<"cortex-a8", CortexA8Model,
[ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
def : ProcessorModel<"cortex-a9-mp", CortexA9Model,
[ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
// FIXME: A15 has currently the same ProcessorModel as A9.
def : ProcessorModel<"cortex-a15", CortexA9Model,
[ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS]>;
+ FeatureDSPThumb2, FeatureHasRAS,
+ FeatureAClass]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureVFPOnlySP,
+ FeatureD16, FeatureRClass]>;
// V7M Processors.
def : ProcNoItin<"cortex-m3", [HasV7Ops,
@@ -289,16 +330,22 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops,
FeatureThumb2, FeatureNoARM, FeatureDB,
FeatureHWDiv, FeatureDSPThumb2,
FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureMClass]>;
+ FeatureVFPOnlySP, FeatureD16,
+ FeatureMClass]>;
// Swift uArch Processors.
def : ProcessorModel<"swift", SwiftModel,
[ProcSwift, HasV7Ops, FeatureNEON,
FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS]>;
+ FeatureHasRAS, FeatureAClass]>;
// V8 Processors
-def : ProcNoItin<"cortex-a53", [HasV8Ops]>;
+def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
+def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 13a22b1..e79f88d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "ARM.h"
#include "ARMBuildAttrs.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFPUName.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
@@ -55,164 +56,6 @@
#include <cctype>
using namespace llvm;
-namespace {
-
- // Per section and per symbol attributes are not supported.
- // To implement them we would need the ability to delay this emission
- // until the assembly file is fully parsed/generated as only then do we
- // know the symbol and section numbers.
- class AttributeEmitter {
- public:
- virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
- virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
- virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
- virtual void Finish() = 0;
- virtual ~AttributeEmitter() {}
- };
-
- class AsmAttributeEmitter : public AttributeEmitter {
- MCStreamer &Streamer;
-
- public:
- AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
- void MaybeSwitchVendor(StringRef Vendor) { }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- Streamer.EmitRawText("\t.eabi_attribute " +
- Twine(Attribute) + ", " + Twine(Value));
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- switch (Attribute) {
- default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
- case ARMBuildAttrs::CPU_name:
- Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
- break;
- /* GAS requires .fpu to be emitted regardless of EABI attribute */
- case ARMBuildAttrs::Advanced_SIMD_arch:
- case ARMBuildAttrs::VFP_arch:
- Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
- break;
- }
- }
- void Finish() { }
- };
-
- class ObjectAttributeEmitter : public AttributeEmitter {
- // This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
- // in declaration order, keeping all in the same vector
- struct AttributeItemType {
- enum {
- HiddenAttribute = 0,
- NumericAttribute,
- TextAttribute
- } Type;
- unsigned Tag;
- unsigned IntValue;
- StringRef StringValue;
- } AttributeItem;
-
- MCObjectStreamer &Streamer;
- StringRef CurrentVendor;
- SmallVector<AttributeItemType, 64> Contents;
-
- // Account for the ULEB/String size of each item,
- // not just the number of items
- size_t ContentsSize;
- // FIXME: this should be in a more generic place, but
- // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
- size_t getULEBSize(int Value) {
- size_t Size = 0;
- do {
- Value >>= 7;
- Size += sizeof(int8_t); // Is this really necessary?
- } while (Value);
- return Size;
- }
-
- public:
- ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
- Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { }
-
- void MaybeSwitchVendor(StringRef Vendor) {
- assert(!Vendor.empty() && "Vendor cannot be empty.");
-
- if (CurrentVendor.empty())
- CurrentVendor = Vendor;
- else if (CurrentVendor == Vendor)
- return;
- else
- Finish();
-
- CurrentVendor = Vendor;
-
- assert(Contents.size() == 0);
- }
-
- void EmitAttribute(unsigned Attribute, unsigned Value) {
- AttributeItemType attr = {
- AttributeItemType::NumericAttribute,
- Attribute,
- Value,
- StringRef("")
- };
- ContentsSize += getULEBSize(Attribute);
- ContentsSize += getULEBSize(Value);
- Contents.push_back(attr);
- }
-
- void EmitTextAttribute(unsigned Attribute, StringRef String) {
- AttributeItemType attr = {
- AttributeItemType::TextAttribute,
- Attribute,
- 0,
- String
- };
- ContentsSize += getULEBSize(Attribute);
- // String + \0
- ContentsSize += String.size()+1;
-
- Contents.push_back(attr);
- }
-
- void Finish() {
- // Vendor size + Vendor name + '\0'
- const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
-
- // Tag + Tag Size
- const size_t TagHeaderSize = 1 + 4;
-
- Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
- Streamer.EmitBytes(CurrentVendor);
- Streamer.EmitIntValue(0, 1); // '\0'
-
- Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
- Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
-
- // Size should have been accounted for already, now
- // emit each field as its type (ULEB or String)
- for (unsigned int i=0; i<Contents.size(); ++i) {
- AttributeItemType item = Contents[i];
- Streamer.EmitULEB128IntValue(item.Tag);
- switch (item.Type) {
- default: llvm_unreachable("Invalid attribute type");
- case AttributeItemType::NumericAttribute:
- Streamer.EmitULEB128IntValue(item.IntValue);
- break;
- case AttributeItemType::TextAttribute:
- Streamer.EmitBytes(item.StringValue.upper());
- Streamer.EmitIntValue(0, 1); // '\0'
- break;
- }
- }
-
- Contents.clear();
- }
- };
-
-} // end of anonymous namespace
-
/// EmitDwarfRegOp - Emit dwarf register operation.
void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc,
bool Indirect) const {
@@ -302,7 +145,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
(Subtarget->isTargetDarwin()
? MCSymbolRefExpr::VK_None
: MCSymbolRefExpr::VK_ARM_TARGET1),
@@ -363,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
(TF & ARMII::MO_HI16))
O << ":upper16:";
- O << *Mang->getSymbol(GV);
+ O << *getSymbol(GV);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -496,6 +339,23 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (!FlagsOP.isImm())
return true;
unsigned Flags = FlagsOP.getImm();
+
+ // This operand may not be the one that actually provides the register. If
+ // it's tied to a previous one then we should refer instead to that one
+ // for registers and their classes.
+ unsigned TiedIdx;
+ if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) {
+ for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) {
+ unsigned OpFlags = MI->getOperand(OpNum).getImm();
+ OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
+ Flags = MI->getOperand(OpNum).getImm();
+
+ // Later code expects OpNum to be pointing at the register rather than
+ // the flags.
+ OpNum += 1;
+ }
+
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
unsigned RC;
InlineAsm::hasRegClassConstraint(Flags, RC);
@@ -714,11 +574,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&OutStreamer))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
}
//===----------------------------------------------------------------------===//
@@ -728,150 +583,150 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// to appear in the .ARM.attributes section in ELF.
// Instead of subclassing the MCELFStreamer, we do the work here.
-void ARMAsmPrinter::emitAttributes() {
-
- emitARMAttributeSection();
-
- /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
- bool emitFPU = false;
- AttributeEmitter *AttrEmitter;
- if (OutStreamer.hasRawTextSupport()) {
- AttrEmitter = new AsmAttributeEmitter(OutStreamer);
- emitFPU = true;
- } else {
- MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
- AttrEmitter = new ObjectAttributeEmitter(O);
- }
-
- AttrEmitter->MaybeSwitchVendor("aeabi");
-
- std::string CPUString = Subtarget->getCPUString();
+static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
+ const ARMSubtarget *Subtarget) {
+ if (CPU == "xscale")
+ return ARMBuildAttrs::v5TEJ;
- if (CPUString == "cortex-a8" ||
- Subtarget->isCortexA8()) {
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
- ARMBuildAttrs::ApplicationProfile);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
- // Fixme: figure out when this is emitted.
- //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
- // ARMBuildAttrs::AllowWMMXv1);
- //
-
- /// ADD additional Else-cases here!
- } else if (CPUString == "xscale") {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::Allowed);
- } else if (Subtarget->hasV8Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v8);
+ if (Subtarget->hasV8Ops())
+ return ARMBuildAttrs::v8;
else if (Subtarget->hasV7Ops()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
- ARMBuildAttrs::AllowThumb32);
+ if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ return ARMBuildAttrs::v7E_M;
+ return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2);
+ return ARMBuildAttrs::v6T2;
+ else if (Subtarget->hasV6MOps())
+ return ARMBuildAttrs::v6S_M;
else if (Subtarget->hasV6Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6);
+ return ARMBuildAttrs::v6;
else if (Subtarget->hasV5TEOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE);
+ return ARMBuildAttrs::v5TE;
else if (Subtarget->hasV5TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T);
+ return ARMBuildAttrs::v5T;
else if (Subtarget->hasV4TOps())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+ return ARMBuildAttrs::v4T;
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4);
+ return ARMBuildAttrs::v4;
+}
- if (Subtarget->hasNEON() && emitFPU) {
- /* NEON is not exactly a VFP architecture, but GAS emit one of
- * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
- if (Subtarget->hasVFP4())
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- "neon-vfpv4");
- else
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
- /* If emitted for NEON, omit from VFP below, since you can have both
- * NEON and VFP in build attributes but only one .fpu */
- emitFPU = false;
+void ARMAsmPrinter::emitAttributes() {
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ ATS.switchVendor("aeabi");
+
+ std::string CPUString = Subtarget->getCPUString();
+
+ if (CPUString != "generic")
+ ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
+
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch,
+ getArchForCPU(CPUString, Subtarget));
+
+ if (Subtarget->isAClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::ApplicationProfile);
+ } else if (Subtarget->isRClass()) {
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::RealTimeProfile);
+ } else if (Subtarget->isMClass()){
+ ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
+ ARMBuildAttrs::MicroControllerProfile);
}
- /* V8FP + .fpu */
- if (Subtarget->hasV8FP()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowV8FPA);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "v8fp");
- /* VFPv4 + .fpu */
- } else if (Subtarget->hasVFP4()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv4A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
-
- /* VFPv3 + .fpu */
- } else if (Subtarget->hasVFP3()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv3A);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
-
- /* VFPv2 + .fpu */
- } else if (Subtarget->hasVFP2()) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
- ARMBuildAttrs::AllowFPv2);
- if (emitFPU)
- AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+ ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ?
+ ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed);
+ if (Subtarget->isThumb1Only()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::Allowed);
+ } else if (Subtarget->hasThumb2()) {
+ ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+ ARMBuildAttrs::AllowThumb32);
}
- /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
- * since NEON can have 1 (allowed) or 2 (MAC operations) */
if (Subtarget->hasNEON()) {
- if (Subtarget->hasV8Ops())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowedNeonV8);
+ /* NEON is not exactly a VFP architecture, but GAS emit one of
+ * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+ if (Subtarget->hasFPARMv8()) {
+ if (Subtarget->hasCrypto())
+ ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8);
+ else
+ ATS.emitFPU(ARM::NEON_FP_ARMV8);
+ }
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(ARM::NEON_VFPV4);
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::Allowed);
+ ATS.emitFPU(ARM::NEON);
+ // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
+ if (Subtarget->hasV8Ops())
+ ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8);
+ } else {
+ if (Subtarget->hasFPARMv8())
+ ATS.emitFPU(ARM::FP_ARMV8);
+ else if (Subtarget->hasVFP4())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4);
+ else if (Subtarget->hasVFP3())
+ ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3);
+ else if (Subtarget->hasVFP2())
+ ATS.emitFPU(ARM::VFPV2);
}
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
- ARMBuildAttrs::Allowed);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+ ARMBuildAttrs::Allowed);
}
if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::Allowed);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::Allowed);
else
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+ ARMBuildAttrs::AllowIEE754);
// FIXME: add more flags to ARMBuildAttrs.h
// 8-bytes alignment stuff.
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+ // ABI_HardFP_use attribute to indicate single precision FP.
+ if (Subtarget->isFPOnlySP())
+ ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use,
+ ARMBuildAttrs::HardFPSinglePrecision);
// Hard float. Use both S and D registers and conform to AAPCS-VFP.
- if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
- AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
- }
+ if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard)
+ ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS);
+
// FIXME: Should we signal R9 usage?
- if (Subtarget->hasDivide())
- AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+ if (Subtarget->hasFP16())
+ ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP);
+
+ if (Subtarget->hasMPExtension())
+ ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP);
+
+ if (Subtarget->hasDivide()) {
+ // Check if hardware divide is only available in thumb2 or ARM as well.
+ ATS.emitAttribute(ARMBuildAttrs::DIV_use,
+ Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt :
+ ARMBuildAttrs::AllowDIVIfExists);
+ }
- AttrEmitter->Finish();
- delete AttrEmitter;
+ if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZVirtualization);
+ else if (Subtarget->hasTrustZone())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowTZ);
+ else if (Subtarget->hasVirtualization())
+ ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
+ ARMBuildAttrs::AllowVirtualization);
+
+ ATS.finishAttributeSection();
}
void ARMAsmPrinter::emitARMAttributeSection() {
@@ -923,7 +778,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
bool isIndirect = Subtarget->isTargetDarwin() &&
Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
if (!isIndirect)
- return Mang->getSymbol(GV);
+ return getSymbol(GV);
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -934,7 +789,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
MMIMachO.getGVStubEntry(MCSym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
return MCSym;
}
@@ -1111,6 +966,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
assert(MI->getFlag(MachineInstr::FrameSetup) &&
"Only instruction which are involved into frame setup code are allowed");
+ MCTargetStreamer &TS = OutStreamer.getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
const MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
@@ -1173,7 +1030,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
RegList.push_back(SrcReg);
break;
}
- OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
} else {
// Changes of stack / frame pointer.
if (SrcReg == ARM::SP) {
@@ -1221,11 +1078,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
if (DstReg == FramePtr && FramePtr != ARM::SP)
// Set-up of the frame pointer. Positive values correspond to "add"
// instruction.
- OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
else if (DstReg == ARM::SP) {
// Change of SP by an offset. Positive values correspond to "sub"
// instruction.
- OutStreamer.EmitPad(Offset);
+ ATS.emitPad(Offset);
} else {
MI->dump();
llvm_unreachable("Unsupported opcode for unwinding information");
@@ -1366,7 +1223,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(0));
const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc)
.addExpr(GVSymExpr)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 977d936..f835a4e 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseInstrInfo.h"
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,7 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
using namespace llvm;
@@ -520,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
- ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
- return AFI->isThumb2Function();
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().restrictIT())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
}
+
return true;
}
@@ -645,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
bool GPRDest = ARM::GPRRegClass.contains(DestReg);
- bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ .addReg(SrcReg, getKillRegState(KillSrc))));
return;
}
bool SPRDest = ARM::SPRRegClass.contains(DestReg);
- bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
unsigned Opc = 0;
if (SPRDest && SPRSrc)
@@ -683,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Spacing = 1;
// Use VORRq when possible.
- if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
- else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+ if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 2;
+ } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VORRq;
+ BeginIdx = ARM::qsub_0;
+ SubRegs = 4;
// Fall back to VMOVD.
- else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
- else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
- else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
- else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
- Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
-
- else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
- else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
- else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
- Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+ } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {
+ Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;
+ BeginIdx = ARM::gsub_0;
+ SubRegs = 2;
+ } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 2;
+ Spacing = 2;
+ } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 3;
+ Spacing = 2;
+ } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {
+ Opc = ARM::VMOVD;
+ BeginIdx = ARM::dsub_0;
+ SubRegs = 4;
+ Spacing = 2;
+ }
assert(Opc && "Impossible reg-to-reg copy");
@@ -711,22 +739,21 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copy register tuples backward when the first Dest reg overlaps with SrcReg.
if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
- BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
+ BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);
Spacing = -Spacing;
}
#ifndef NDEBUG
SmallSet<unsigned, 4> DstRegs;
#endif
for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing);
+ unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
+ unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
assert(Dst && Src && "Bad sub-register");
#ifndef NDEBUG
assert(!DstRegs.count(Src) && "destructive vector copy");
DstRegs.insert(Dst);
#endif
- Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src);
+ Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
@@ -1404,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRDi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1423,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case ARM::VLDRD:
case ARM::VLDRS:
case ARM::t2LDRi8:
+ case ARM::t2LDRBi8:
case ARM::t2LDRSHi8:
case ARM::t2LDRi12:
+ case ARM::t2LDRBi12:
case ARM::t2LDRSHi12:
break;
}
@@ -1471,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
if ((Offset2 - Offset1) / 8 > 64)
return false;
- if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ // Check if the machine opcodes are different. If they are different
+ // then we consider them to not be of the same base address,
+ // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.
+ // In this case, they are considered to be the same because they are different
+ // encoding forms of the same basic instruction.
+ if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&
+ !((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi12) ||
+ (Load1->getMachineOpcode() == ARM::t2LDRBi12 &&
+ Load2->getMachineOpcode() == ARM::t2LDRBi8)))
return false; // FIXME: overly conservative?
// Four loads in a row should be sufficient.
@@ -1686,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
bool PreferFalse) const {
assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
"Unknown select instruction");
- const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
bool Invert = !DefMI;
if (!DefMI)
@@ -1694,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
return 0;
+ // Find new register class to use.
+ MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return 0;
+
// Create a new predicated version of DefMI.
// Rfalse is the first use.
MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- DefMI->getDesc(),
- MI->getOperand(0).getReg());
+ DefMI->getDesc(), DestReg);
// Copy all the DefMI operands, excluding its (null) predicate.
const MCInstrDesc &DefDesc = DefMI->getDesc();
@@ -1721,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
- MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
FalseReg.setImplicit();
NewMI.addOperand(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
@@ -1781,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+ .setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -1804,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
+bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned NumBytes) {
+ // This optimisation potentially adds lots of load and store
+ // micro-operations, it's only really a great benefit to code-size.
+ if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ return false;
+
+ // If only one register is pushed/popped, LLVM can use an LDR/STR
+ // instead. We can't modify those so make sure we're dealing with an
+ // instruction we understand.
+ bool IsPop = isPopOpcode(MI->getOpcode());
+ bool IsPush = isPushOpcode(MI->getOpcode());
+ if (!IsPush && !IsPop)
+ return false;
+
+ bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||
+ MI->getOpcode() == ARM::VLDMDIA_UPD;
+ bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||
+ MI->getOpcode() == ARM::tPOP ||
+ MI->getOpcode() == ARM::tPOP_RET;
+
+ assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&
+ MI->getOperand(1).getReg() == ARM::SP)) &&
+ "trying to fold sp update into non-sp-updating push/pop");
+
+ // The VFP push & pop act on D-registers, so we can only fold an adjustment
+ // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try
+ // if this is violated.
+ if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)
+ return false;
+
+ // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
+ // pred) so the list starts at 4. Thumb1 starts after the predicate.
+ int RegListIdx = IsT1PushPop ? 2 : 4;
+
+ // Calculate the space we'll need in terms of registers.
+ unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
+ unsigned RD0Reg, RegsNeeded;
+ if (IsVFPPushPop) {
+ RD0Reg = ARM::D0;
+ RegsNeeded = NumBytes / 8;
+ } else {
+ RD0Reg = ARM::R0;
+ RegsNeeded = NumBytes / 4;
+ }
+
+ // We're going to have to strip all list operands off before
+ // re-adding them since the order matters, so save the existing ones
+ // for later.
+ SmallVector<MachineOperand, 4> RegList;
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ RegList.push_back(MI->getOperand(i));
+
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+
+ // Now try to find enough space in the reglist to allocate NumBytes.
+ for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
+ --CurReg) {
+ if (!IsPop) {
+ // Pushing any register is completely harmless, mark the
+ // register involved as undef since we don't care about it in
+ // the slightest.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,
+ false, false, true));
+ --RegsNeeded;
+ continue;
+ }
+
+ // However, we can only pop an extra register if it's not live. For
+ // registers live within the function we might clobber a return value
+ // register; the other way a register can be live here is if it's
+ // callee-saved.
+ if (isCalleeSavedRegister(CurReg, CSRegs) ||
+ MBB->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
+ // VFP pops don't allow holes in the register list, so any skip is fatal
+ // for our transformation. GPR pops do, so we should just keep looking.
+ if (IsVFPPushPop)
+ return false;
+ else
+ continue;
+ }
+
+ // Mark the unimportant registers as <def,dead> in the POP.
+ RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,
+ true));
+ --RegsNeeded;
+ }
+
+ if (RegsNeeded > 0)
+ return false;
+
+ // Finally we know we can profitably perform the optimisation so go
+ // ahead: strip all existing registers off and add them back again
+ // in the right order.
+ for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
+ MI->RemoveOperand(i);
+
+ // Add the complete list back in.
+ MachineInstrBuilder MIB(MF, &*MI);
+ for (int i = RegList.size() - 1; i >= 0; --i)
+ MIB.addOperand(RegList[i]);
+
+ return true;
+}
+
bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII) {
@@ -2210,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
isSafe = true;
break;
}
- // Condition code is after the operand before CPSR.
- ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
+
if (Sub) {
ARMCC::CondCodes NewCC = getSwappedCondition(CC);
if (NewCC == ARMCC::AL)
@@ -2222,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If it is safe to remove CmpInstr, the condition code of these
// operands will be modified.
if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg)
- OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
- NewCC));
- }
- else
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
+ return false;
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
+ }
+ } else
switch (CC) {
default:
// CPSR can be used multiple times, we should continue.
@@ -3582,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const {
+ if (MI->isCopyLike() || MI->isInsertSubreg() ||
+ MI->isRegSequence() || MI->isImplicitDef())
+ return 0;
+
+ if (MI->isBundle())
+ return 0;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+
+ if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ // When predicated, CPSR is an additional source operand for CPSR updating
+ // instructions, this apparently increases their latencies.
+ return 1;
+ }
+ return 0;
+}
+
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -4114,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
// the full D-register by loading the same value to both lanes. The
// instruction is micro-coded with 2 uops, so don't do this until we can
- // properly schedule micro-coded instuctions. The dispatcher stalls cause
+ // properly schedule micro-coded instructions. The dispatcher stalls cause
// too big regressions.
// Insert the dependency-breaking FCONSTD before MI.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 96f8637..93e5964 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -264,6 +264,8 @@ private:
const MCInstrDesc &UseMCID,
unsigned UseIdx, unsigned UseAlign) const;
+ unsigned getPredicationCost(const MachineInstr *MI) const;
+
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
@@ -360,6 +362,17 @@ bool isIndirectBranchOpcode(int Opc) {
return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
}
+static inline bool isPopOpcode(int Opc) {
+ return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET ||
+ Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD ||
+ Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD;
+}
+
+static inline bool isPushOpcode(int Opc) {
+ return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD ||
+ Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -399,6 +412,13 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
const ARMBaseRegisterInfo& MRI,
unsigned MIFlags = 0);
+/// Tries to add registers to the reglist of a given base-updating
+/// push/pop instruction to adjust the stack by an additional
+/// NumBytes. This can save a few bytes per function in code-size, but
+/// obviously generates more memory traffic. As such, it only takes
+/// effect in functions being optimised for size.
+bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI,
+ unsigned NumBytes);
/// rewriteARMFrameIndex / rewriteT2FrameIndex -
/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 58c06e3..8717dc0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -51,20 +51,34 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
const uint16_t*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool ghcCall = false;
-
- if (MF) {
- const Function *F = MF->getFunction();
- ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
- }
-
- if (ghcCall)
+ const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_SaveList
+ : CSR_AAPCS_SaveList;
+
+ if (!MF) return RegList;
+
+ const Function *F = MF->getFunction();
+ if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
return CSR_NoRegs_SaveList;
- else
- return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
- ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+ } else if (F->hasFnAttribute("interrupt")) {
+ if (STI.isMClass()) {
+ // M-class CPUs have hardware which saves the registers needed to allow a
+ // function conforming to the AAPCS to function as a handler.
+ return CSR_AAPCS_SaveList;
+ } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") {
+ // Fast interrupt mode gives the handler a private copy of R8-R14, so less
+ // need to be saved to restore user-mode state.
+ return CSR_FIQ_SaveList;
+ } else {
+ // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by
+ // exception handling.
+ return CSR_GenericInt_SaveList;
+ }
+ }
+
+ return RegList;
}
const uint32_t*
@@ -371,14 +385,6 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return ARM::SP;
}
-unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
/// emitLoadConstPool - Emits a load from constpool to materialize the
/// specified immediate.
void ARMBaseRegisterInfo::
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index cdaad05..e28fff6 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -72,6 +72,14 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
}
}
+static inline bool isCalleeSavedRegister(unsigned Reg,
+ const MCPhysReg *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMSubtarget &STI;
@@ -149,10 +157,6 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getBaseRegister() const { return BasePtr; }
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
bool isLowRegister(unsigned Reg) const;
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index f614dca..b16d4ef 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -15,11 +15,13 @@
#ifndef __TARGET_ARMBUILDATTRS_H__
#define __TARGET_ARMBUILDATTRS_H__
+namespace llvm {
namespace ARMBuildAttrs {
+
enum SpecialAttr {
// This is for the .cpu asm attr. It translates into one or more
// AttrType (below) entries in the .ARM.attributes section in the ELF.
- SEL_CPU
+ SEL_CPU
};
enum AttrType {
@@ -57,7 +59,7 @@ namespace ARMBuildAttrs {
ABI_FP_optimization_goals = 31,
compatibility = 32,
CPU_unaligned_access = 34,
- VFP_HP_extension = 36,
+ FP_HP_extension = 36,
ABI_FP_16bit_format = 38,
MPextension_use = 42, // was 70, 2.08 ABI
DIV_use = 44,
@@ -93,7 +95,7 @@ namespace ARMBuildAttrs {
v8 = 14 // v8, AArch32
};
- enum CPUArchProfile { // (=7), uleb128
+ enum CPUArchProfile { // (=7), uleb128
Not_Applicable = 0, // pre v7, or cross-profile code
ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
@@ -102,34 +104,67 @@ namespace ARMBuildAttrs {
};
// The following have a lot of common use cases
- enum {
- //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128
+ enum {
Not_Allowed = 0,
Allowed = 1,
- AllowedNeonV8 = 3,
- // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+ // Tag_ARM_ISA_use (=8), uleb128
+
+ // Tag_THUMB_ISA_use, (=9), uleb128
+ AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+
+ // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
- AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
- AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
+ AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31
+ AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA)
AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
- AllowV8FPA = 7, // Use of the ARM v8-A FP ISA was permitted
- AllowV8FPB = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31
+ AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted
+ AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31
// Tag_WMMX_arch, (=11), uleb128
- AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
-
- // Tag_WMMX_arch, (=11), uleb128
- AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2
+ AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1
+ AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2
+
+ // Tag_Advanced_SIMD_arch, (=12), uleb128
+ AllowNeon = 1, // SIMDv1 was permitted
+ AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations)
+ AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted
- // Tag_ABI_FP_denormal, (=20), uleb128
+ // Tag_ABI_FP_denormal, (=20), uleb128
PreserveFPSign = 2, // sign when flushed-to-zero is preserved
// Tag_ABI_FP_number_model, (=23), uleb128
AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI])
- AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+ AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings
+
+ // Tag_ABI_HardFP_use, (=27), uleb128
+ HardFPImplied = 0, // FP use should be implied by Tag_FP_arch
+ HardFPSinglePrecision = 1, // Single-precision only
+
+ // Tag_ABI_VFP_args, (=28), uleb128
+ BaseAAPCS = 0,
+ HardFPAAPCS = 1,
+
+ // Tag_FP_HP_extension, (=36), uleb128
+ AllowHPFP = 1, // Allow use of Half Precision FP
+
+ // Tag_MPextension_use, (=42), uleb128
+ AllowMP = 1, // Allow use of MP extensions
+
+ // Tag_DIV_use, (=44), uleb128
+ AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists.
+ DisallowDIV = 1, // Hardware divide explicitly disallowed
+ AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above
+ // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile.
+
+ // Tag_Virtualization_use, (=68), uleb128
+ AllowTZ = 1,
+ AllowVirtualization = 2,
+ AllowTZVirtualization = 3
};
-}
+
+} // namespace ARMBuildAttrs
+} // namespace llvm
#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 89c5223..9bea4b2 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -207,4 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
- (sub CSR_AAPCS_ThisReturn, R9))>;
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
+// The "interrupt" attribute is used to generate code that is acceptable in
+// exception-handlers of various kinds. It makes us use a different return
+// instruction (handled elsewhere) and affects which registers we must return to
+// our "caller" in the same state as we receive them.
+
+// For most interrupts, all registers except SP and LR are shared with
+// user-space. We mark LR to be saved anyway, since this is what the ARM backend
+// generally does rather than tracking its liveness as a normal register.
+def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
+
+// The fast interrupt handlers have more private state and get their own copies
+// of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
+
+// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and
+// current frame lowering expects to encounter it while processing callee-saved
+// registers.
+def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;
+
+
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 4e703ec..7d41c69 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const {
int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolConstant *APC = dyn_cast<ARMConstantPoolConstant>(CPV);
- if (!APC) continue;
- if (APC->CVal == CVal && equals(APC))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment);
}
bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolSymbol *APS = dyn_cast<ARMConstantPoolSymbol>(CPV);
- if (!APS) continue;
-
- if (APS->S == S && equals(APS))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolSymbol>(CP, Alignment);
}
bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) {
@@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C,
int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) {
- unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
- for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
- if (Constants[i].isMachineConstantPoolEntry() &&
- (Constants[i].getAlignment() & AlignMask) == 0) {
- ARMConstantPoolValue *CPV =
- (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
- ARMConstantPoolMBB *APMBB = dyn_cast<ARMConstantPoolMBB>(CPV);
- if (!APMBB) continue;
-
- if (APMBB->MBB == MBB && equals(APMBB))
- return i;
- }
- }
-
- return -1;
+ return getExistingMachineCPValueImpl<ARMConstantPoolMBB>(CP, Alignment);
}
bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) {
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 93812fe..7ae7bf4 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
@@ -64,6 +65,26 @@ protected:
ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind,
unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
bool AddCurrentAddress);
+
+ template <typename Derived>
+ int getExistingMachineCPValueImpl(MachineConstantPool *CP,
+ unsigned Alignment) {
+ unsigned AlignMask = Alignment - 1;
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
+ for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+ if (Constants[i].isMachineConstantPoolEntry() &&
+ (Constants[i].getAlignment() & AlignMask) == 0) {
+ ARMConstantPoolValue *CPV =
+ (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+ if (Derived *APC = dyn_cast<Derived>(CPV))
+ if (cast<Derived>(this)->equals(APC))
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
public:
virtual ~ARMConstantPoolValue();
@@ -156,6 +177,10 @@ public:
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
}
+
+ bool equals(const ARMConstantPoolConstant *A) const {
+ return CVal == A->CVal && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolSymbol - ARM-specific constantpool values for external
@@ -187,6 +212,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isExtSymbol();
}
+
+ bool equals(const ARMConstantPoolSymbol *A) const {
+ return S == A->S && ARMConstantPoolValue::equals(A);
+ }
};
/// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic
@@ -219,6 +248,10 @@ public:
static bool classof(const ARMConstantPoolValue *ACPV) {
return ACPV->isMachineBasicBlock();
}
+
+ bool equals(const ARMConstantPoolMBB *A) const {
+ return MBB == A->MBB && ARMConstantPoolValue::equals(A);
+ }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index beb843c..e6f7f86 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -692,10 +692,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -705,10 +704,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
@@ -717,39 +715,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
+ .addOperand(MI.getOperand(2))
.addImm(MI.getOperand(3).getImm())
.addImm(MI.getOperand(4).getImm()) // 'pred'
- .addReg(MI.getOperand(5).getReg())
+ .addOperand(MI.getOperand(5))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
-
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addReg(MI.getOperand(2).getReg(),
- getKillRegState(MI.getOperand(2).isKill()))
- .addReg(MI.getOperand(3).getReg(),
- getKillRegState(MI.getOperand(3).isKill()))
+ .addOperand(MI.getOperand(2))
+ .addOperand(MI.getOperand(3))
.addImm(MI.getOperand(4).getImm())
.addImm(MI.getOperand(5).getImm()) // 'pred'
- .addReg(MI.getOperand(6).getReg())
+ .addOperand(MI.getOperand(6))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCCi16:
case ARM::MOVCCi16: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16),
+ unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg());
-
+ .addOperand(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -760,23 +755,47 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MVNCCi:
case ARM::MVNCCi: {
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi),
+ unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
.addImm(MI.getOperand(2).getImm())
.addImm(MI.getOperand(3).getImm()) // 'pred'
- .addReg(MI.getOperand(4).getReg())
+ .addOperand(MI.getOperand(4))
.addReg(0); // 's' bit
MI.eraseFromParent();
return true;
}
+ case ARM::t2MOVCClsl:
+ case ARM::t2MOVCClsr:
+ case ARM::t2MOVCCasr:
+ case ARM::t2MOVCCror: {
+ unsigned NewOpc;
+ switch (Opcode) {
+ case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
+ case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
+ case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
+ case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
+ default: llvm_unreachable("unexpeced conditional move");
+ }
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
+ MI.getOperand(1).getReg())
+ .addOperand(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .addOperand(MI.getOperand(5))
+ .addReg(0); // 's' bit
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::Int_eh_sjlj_dispatchsetup: {
MachineFunction &MF = *MI.getParent()->getParent();
const ARMBaseInstrInfo *AII =
@@ -823,7 +842,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
- // These are just fancy MOVs insructions.
+ // These are just fancy MOVs instructions.
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
@@ -938,6 +957,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
+ case ARM::SUBS_PC_LR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
+ .addReg(ARM::LR)
+ .addOperand(MI.getOperand(0))
+ .addOperand(MI.getOperand(1))
+ .addOperand(MI.getOperand(2))
+ .addReg(ARM::CPSR, RegState::Undef);
+ TransferImpOps(MI, MIB, MIB);
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VLDMQIA: {
unsigned NewOpc = ARM::VLDMDIA;
MachineInstrBuilder MIB =
diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def
new file mode 100644
index 0000000..9a1bbe7
--- /dev/null
+++ b/lib/Target/ARM/ARMFPUName.def
@@ -0,0 +1,32 @@
+//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the list of the supported ARM FPU names.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_FPU_NAME
+#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h"
+#endif
+
+ARM_FPU_NAME("vfp", VFP)
+ARM_FPU_NAME("vfpv2", VFPV2)
+ARM_FPU_NAME("vfpv3", VFPV3)
+ARM_FPU_NAME("vfpv3-d16", VFPV3_D16)
+ARM_FPU_NAME("vfpv4", VFPV4)
+ARM_FPU_NAME("vfpv4-d16", VFPV4_D16)
+ARM_FPU_NAME("fp-armv8", FP_ARMV8)
+ARM_FPU_NAME("neon", NEON)
+ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4)
+ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8)
+ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8)
+
+#undef ARM_FPU_NAME
diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h
new file mode 100644
index 0000000..2a64cce
--- /dev/null
+++ b/lib/Target/ARM/ARMFPUName.h
@@ -0,0 +1,26 @@
+//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMFPUNAME_H
+#define ARMFPUNAME_H
+
+namespace llvm {
+namespace ARM {
+
+enum FPUKind {
+ INVALID_FPU = 0
+
+#define ARM_FPU_NAME(NAME, ID) , ID
+#include "ARMFPUName.def"
+};
+
+} // namespace ARM
+} // namespace llvm
+
+#endif // ARMFPUNAME_H
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index ed054aa..a4004f3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -176,6 +176,8 @@ class ARMFastISel : public FastISel {
// Utility routines.
private:
+ unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum,
+ unsigned Op);
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -252,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
- // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
AFI->isThumb2Function())
- return false;
+ return MI->isPredicable();
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
if (MCID.OpInfo[i].isPredicate())
@@ -276,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Do we use a predicate? or...
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
- if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ if (isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
@@ -291,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
+unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II,
+ unsigned Op, unsigned OpNum) {
+ if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op));
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass* RC) {
unsigned ResultReg = createResultReg(RC);
@@ -306,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill));
@@ -326,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -349,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
+ Op2 = constrainOperandRegClass(II, Op1, 3);
+
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -373,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -395,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operand is sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -418,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ // Make sure the input operands are sufficiently constrained to be legal
+ // for this instruction.
+ Op0 = constrainOperandRegClass(II, Op0, 1);
+ Op1 = constrainOperandRegClass(II, Op1, 2);
if (II.getNumDefs() >= 1) {
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -610,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
.addConstantPoolIndex(Idx));
else
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::LDRcp), DestReg)
.addConstantPoolIndex(Idx)
@@ -685,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
AddOptionalDefs(MIB);
} else {
// The extra immediate is for addrmode2.
+ DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
DestReg)
.addConstantPoolIndex(Idx)
@@ -855,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
@@ -1139,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
(const TargetRegisterClass*)&ARM::tGPRRegClass :
(const TargetRegisterClass*)&ARM::GPRRegClass);
unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
+ SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), Res)
.addReg(SrcReg).addImm(1));
@@ -1210,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
ARMSimplifyAddress(Addr, VT, useAM3);
// Create the base instruction, then add the operands.
+ SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(StrOpc))
.addReg(SrcReg);
@@ -1333,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
(isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
unsigned OpReg = getRegForValue(TI->getOperand(0));
+ OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TstOpc))
.addReg(OpReg).addImm(1));
@@ -1370,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
+ CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
.addReg(CmpReg).addImm(1));
@@ -1494,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
}
}
+ const MCInstrDesc &II = TII.get(CmpOpc);
+ SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
if (!UseImm) {
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(CmpOpc))
+ SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1).addReg(SrcReg2));
} else {
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(SrcReg1);
// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
@@ -1699,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
}
unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
+ CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
.addReg(CondReg).addImm(0));
@@ -1715,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
}
unsigned ResultReg = createResultReg(RC);
- if (!UseImm)
+ if (!UseImm) {
+ Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
- else
+ } else {
+ Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
.addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1806,6 +1857,8 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
if (SrcReg2 == 0) return false;
unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
+ SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
+ SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg)
.addReg(SrcReg1).addReg(SrcReg2));
@@ -1933,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
!VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
return false;
} else {
- switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+ switch (ArgVT.SimpleTy) {
default:
return false;
case MVT::i1:
@@ -2410,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I,
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(CallOpc));
+ unsigned char OpFlags = 0;
+
+ // Add MO_PLT for global address or external symbol in the PIC relocation
+ // model.
+ if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
AddDefaultPred(MIB);
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
- MIB.addGlobalAddress(GV, 0, 0);
+ MIB.addGlobalAddress(GV, 0, OpFlags);
else
- MIB.addExternalSymbol(IntrMemName, 0);
+ MIB.addExternalSymbol(IntrMemName, OpFlags);
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -2731,6 +2791,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
if (setsCPSR)
MIB.addReg(ARM::CPSR, RegState::Define);
+ SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
if (hasS)
AddDefaultCC(MIB);
@@ -2965,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
// Load value.
if (isThumb2) {
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(ARM::t2LDRpci), DestReg1)
.addConstantPoolIndex(Idx));
Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
} else {
// The extra immediate is for addrmode2.
+ DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(ARM::LDRcp), DestReg1)
.addConstantPoolIndex(Idx).addImm(0));
@@ -2984,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
}
unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
+ DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
+ DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
+ GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
DL, TII.get(Opc), DestReg2)
.addReg(DestReg1)
@@ -3049,7 +3115,7 @@ bool ARMFastISel::FastLowerArguments() {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC = &ARM::rGPRRegClass;
Idx = 0;
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++Idx) {
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
new file mode 100644
index 0000000..dafc4b3
--- /dev/null
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -0,0 +1,93 @@
+//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the code shared between ARM CodeGen and ARM MC
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_FEATURES_H
+#define TARGET_ARM_FEATURES_H
+
+#include "ARM.h"
+
+namespace llvm {
+
+template<typename InstrType> // could be MachineInstr or MCInst
+inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) {
+ switch (Instr->getOpcode()) {
+ default:
+ return false;
+ case ARM::tADC:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tADDrSPi:
+ case ARM::tADDrr:
+ case ARM::tAND:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tBIC:
+ case ARM::tCMNz:
+ case ARM::tCMPi8:
+ case ARM::tCMPr:
+ case ARM::tEOR:
+ case ARM::tLDRBi:
+ case ARM::tLDRBr:
+ case ARM::tLDRHi:
+ case ARM::tLDRHr:
+ case ARM::tLDRSB:
+ case ARM::tLDRSH:
+ case ARM::tLDRi:
+ case ARM::tLDRr:
+ case ARM::tLDRspi:
+ case ARM::tLSLri:
+ case ARM::tLSLrr:
+ case ARM::tLSRri:
+ case ARM::tLSRrr:
+ case ARM::tMOVi8:
+ case ARM::tMUL:
+ case ARM::tMVN:
+ case ARM::tORR:
+ case ARM::tROR:
+ case ARM::tRSB:
+ case ARM::tSBC:
+ case ARM::tSTRBi:
+ case ARM::tSTRBr:
+ case ARM::tSTRHi:
+ case ARM::tSTRHr:
+ case ARM::tSTRi:
+ case ARM::tSTRr:
+ case ARM::tSTRspi:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tSUBrr:
+ case ARM::tTST:
+ return true;
+// there are some "conditionally deprecated" opcodes
+ case ARM::tADDspr:
+ return Instr->getOperand(2).getReg() != ARM::PC;
+ // ADD PC, SP and BLX PC were always unpredictable,
+ // now on top of it they're deprecated
+ case ARM::tADDrSP:
+ case ARM::tBX:
+ return Instr->getOperand(0).getReg() != ARM::PC;
+ case ARM::tBLXr:
+ return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC;
+ case ARM::tADDhirr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(2).getReg() != ARM::PC;
+ case ARM::tCMPhir:
+ case ARM::tMOVr:
+ return Instr->getOperand(0).getReg() != ARM::PC &&
+ Instr->getOperand(1).getReg() != ARM::PC;
+ }
+}
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c8637be..d32bdbc 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -82,22 +82,11 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
const uint16_t *CSRegs) {
// Integer spill area is handled with "pop".
- if (MI->getOpcode() == ARM::LDMIA_RET ||
- MI->getOpcode() == ARM::t2LDMIA_RET ||
- MI->getOpcode() == ARM::LDMIA_UPD ||
- MI->getOpcode() == ARM::t2LDMIA_UPD ||
- MI->getOpcode() == ARM::VLDMDIA_UPD) {
+ if (isPopOpcode(MI->getOpcode())) {
// The first two operands are predicates. The last two are
// imp-def and imp-use of SP. Check everything in between.
for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
@@ -115,20 +104,31 @@ static bool isCSRestore(MachineInstr *MI,
return false;
}
-static void
-emitSPUpdate(bool isARM,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, const ARMBaseInstrInfo &TII,
- int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, unsigned DestReg,
+ unsigned SrcReg, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+ emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
Pred, PredReg, TII, MIFlags);
}
+static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ const ARMBaseInstrInfo &TII, int NumBytes,
+ unsigned MIFlags = MachineInstr::NoFlags,
+ ARMCC::CondCodes Pred = ARMCC::AL,
+ unsigned PredReg = 0) {
+ emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
+ MIFlags, Pred, PredReg);
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -175,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = CSI[i].getReg();
int FI = CSI[i].getFrameIdx();
switch (Reg) {
+ case ARM::R0:
+ case ARM::R1:
+ case ARM::R2:
+ case ARM::R3:
case ARM::R4:
case ARM::R5:
case ARM::R6:
@@ -182,73 +186,61 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
case ARM::R9:
case ARM::R10:
case ARM::R11:
+ case ARM::R12:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
D8SpillFI = FI;
- if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
- AFI->addDPRCalleeSavedAreaFrame(FI);
+ if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
DPRCSSize += 8;
- }
}
}
// Move past area 1.
- if (GPRCS1Size > 0) MBBI++;
-
- // Set FP to point to the stack slot that contains the previous FP.
- // For iOS, FP is R7, which has now been stored in spill area 1.
- // Otherwise, if this is not iOS, all the callee-saved registers go
- // into spill area 1, including the FP in R11. In either case, it is
- // now safe to emit this assignment.
- bool HasFP = hasFP(MF);
- if (HasFP) {
- unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
- AddDefaultCC(AddDefaultPred(MIB));
- }
-
- // Move past area 2.
- if (GPRCS2Size > 0) MBBI++;
+ MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush;
+ if (GPRCS1Size > 0)
+ FramePtrPush = LastPush = MBBI++;
// Determine starting offsets of spill areas.
+ bool HasFP = hasFP(MF);
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (HasFP)
+ int FramePtrOffsetInPush = 0;
+ if (HasFP) {
+ FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
+ }
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+ // Move past area 2.
+ if (GPRCS2Size > 0) {
+ LastPush = MBBI++;
+ }
+
// Move past area 3.
if (DPRCSSize > 0) {
- MBBI++;
+ LastPush = MBBI++;
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
- MBBI++;
+ LastPush = MBBI++;
}
// Move past the aligned DPRCS2 area.
@@ -264,8 +256,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
- MachineInstr::FrameSetup);
+ if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) {
+ if (LastPush == FramePtrPush)
+ FramePtrOffsetInPush += NumBytes;
+ } else
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+ MachineInstr::FrameSetup);
+
if (HasFP && isARM)
// Restore from fp only in ARM mode: e.g. sub sp, r7, #24
// Note it's not safe to do this in Thumb2 mode because it would have
@@ -278,6 +275,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
+ // Set FP to point to the stack slot that contains the previous FP.
+ // For iOS, FP is R7, which has now been stored in spill area 1.
+ // Otherwise, if this is not iOS, all the callee-saved registers go
+ // into spill area 1, including the FP in R11. In either case, it
+ // is in area one and the adjustment needs to take place just after
+ // that push.
+ if (HasFP)
+ emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII,
+ FramePtr, ARM::SP, FramePtrOffsetInPush,
+ MachineInstr::FrameSetup);
+
+
if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
@@ -373,11 +382,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
- do
+ do {
--MBBI;
- while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+ } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
if (!isCSRestore(MBBI, TII, CSRegs))
++MBBI;
}
@@ -421,8 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
}
- } else if (NumBytes)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+ } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
if (AFI->getDPRCalleeSavedAreaSize()) {
@@ -501,12 +510,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
FrameReg = ARM::SP;
Offset += SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- return Offset - AFI->getGPRCalleeSavedArea2Offset();
- else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- return Offset - AFI->getDPRCalleeSavedAreaOffset();
// SP can move around if there are allocas. We may also lose track of SP
// when emergency spilling inside a non-reserved call frame setup.
@@ -658,6 +661,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
unsigned RetOpcode = MI->getOpcode();
bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
RetOpcode == ARM::TCRETURNri);
+ bool isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -672,7 +677,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
continue;
- if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+ if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
+ STI.hasV5TOps()) {
Reg = ARM::PC;
LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
// Fold the return instruction into the LDM.
@@ -1199,7 +1205,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -1226,6 +1232,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
case ARM::LR:
LRSpilled = true;
// Fallthrough
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
CS1Spilled = true;
@@ -1240,6 +1248,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
switch (Reg) {
+ case ARM::R0: case ARM::R1:
+ case ARM::R2: case ARM::R3:
case ARM::R4: case ARM::R5:
case ARM::R6: case ARM::R7:
case ARM::LR:
@@ -1295,8 +1305,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (!LRSpilled && CS1Spilled) {
MRI.setPhysRegUsed(ARM::LR);
NumGPRSpills++;
- UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
- UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+ SmallVectorImpl<unsigned>::iterator LRPos;
+ LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ (unsigned)ARM::LR);
+ if (LRPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(LRPos);
+
ForceLRSpill = false;
ExtraCSSpill = true;
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 4ca3af6..87d1522 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -130,6 +130,13 @@ public:
return true;
}
+ bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
+ const ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
+ return true;
+ }
+
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
@@ -239,21 +246,6 @@ private:
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
- /// SelectCMOVOp - Select CMOV instructions for ARM.
- SDNode *SelectCMOVOp(SDNode *N);
- SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
- SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR,
- SDValue InFlag);
-
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
@@ -261,7 +253,7 @@ private:
SDNode *SelectConcatVector(SDNode *N);
- SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+ SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
@@ -2321,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
return NULL;
}
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
- unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
- unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
- unsigned Opc = 0;
- switch (SOShOp) {
- case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
- case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
- case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
- case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
- default:
- llvm_unreachable("Unknown so_reg opcode!");
- }
- SDValue SOShImm =
- CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- SDValue CPTmp0;
- SDValue CPTmp1;
- SDValue CPTmp2;
- if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6);
- }
-
- if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7);
- }
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- if (is_t2_so_imm(TrueImm)) {
- Opc = ARM::t2MOVCCi;
- } else if (TrueImm <= 0xffff) {
- Opc = ARM::t2MOVCCi16;
- } else if (is_t2_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::t2MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
- // Large immediate.
- Opc = ARM::t2MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::
-SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
- ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
- ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
- if (!T)
- return 0;
-
- unsigned Opc = 0;
- unsigned TrueImm = T->getZExtValue();
- bool isSoImm = is_so_imm(TrueImm);
- if (isSoImm) {
- Opc = ARM::MOVCCi;
- } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
- Opc = ARM::MOVCCi16;
- } else if (is_so_imm_not(TrueImm)) {
- TrueImm = ~TrueImm;
- Opc = ARM::MVNCCi;
- } else if (TrueVal.getNode()->hasOneUse() &&
- (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
- // Large immediate.
- Opc = ARM::MOVCCi32imm;
- }
-
- if (Opc) {
- SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
- SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
- }
-
- return 0;
-}
-
-SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue FalseVal = N->getOperand(0);
- SDValue TrueVal = N->getOperand(1);
- SDValue CC = N->getOperand(2);
- SDValue CCR = N->getOperand(3);
- SDValue InFlag = N->getOperand(4);
- assert(CC.getOpcode() == ISD::Constant);
- assert(CCR.getOpcode() == ISD::Register);
- ARMCC::CondCodes CCVal =
- (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
-
- if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
- // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 18 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Pred_so_imm>>:$true,
- // (imm:i32):$cc)
- // Emits: (MOVCCi:i32 GPR:i32:$false,
- // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
- // Pattern complexity = 10 cost = 1 size = 0
- if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- } else {
- SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
- CCVal, CCR, InFlag);
- if (!Res)
- Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
- ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
- if (Res)
- return Res;
- }
- }
-
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 1 size = 0
- //
- // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
- // Pattern complexity = 6 cost = 11 size = 0
- //
- // Also VMOVScc and VMOVDcc.
- SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
- SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
- unsigned Opc = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Illegal conditional move type!");
- case MVT::i32:
- Opc = Subtarget->isThumb()
- ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
- : ARM::MOVCCr;
- break;
- case MVT::f32:
- Opc = ARM::VMOVScc;
- break;
- case MVT::f64:
- Opc = ARM::VMOVDcc;
- break;
- }
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
-}
-
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
@@ -2567,30 +2361,45 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
}
-SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+ unsigned Op16,unsigned Op32,
+ unsigned Op64) {
+ // Mostly direct translation to the given operations, except that we preserve
+ // the AtomicOrdering for use later on.
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ EVT VT = AN->getMemoryVT();
+
+ unsigned Op;
+ SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other);
+ if (VT == MVT::i8)
+ Op = Op8;
+ else if (VT == MVT::i16)
+ Op = Op16;
+ else if (VT == MVT::i32)
+ Op = Op32;
+ else if (VT == MVT::i64) {
+ Op = Op64;
+ VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other);
+ } else
+ llvm_unreachable("Unexpected atomic operation");
+
SmallVector<SDValue, 6> Ops;
- Ops.push_back(Node->getOperand(1)); // Ptr
- Ops.push_back(Node->getOperand(2)); // Low part of Val1
- Ops.push_back(Node->getOperand(3)); // High part of Val1
- if (Opc == ARM::ATOMCMPXCHG6432) {
- Ops.push_back(Node->getOperand(4)); // Low part of Val2
- Ops.push_back(Node->getOperand(5)); // High part of Val2
- }
- Ops.push_back(Node->getOperand(0)); // Chain
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
- SDNode *ResNode = CurDAG->getMachineNode(Opc, SDLoc(Node),
- MVT::i32, MVT::i32, MVT::Other,
- Ops);
- cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
- return ResNode;
+ for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+ Ops.push_back(AN->getOperand(i));
+
+ Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+ Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+ return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size());
}
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
@@ -2882,8 +2691,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue(Chain.getNode(), Chain.getResNo()));
return NULL;
}
- case ARMISD::CMOV:
- return SelectCMOVOp(N);
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
@@ -3457,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
- case ARMISD::ATOMOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMOR6432);
- case ARMISD::ATOMXOR64_DAG:
- return SelectAtomic64(N, ARM::ATOMXOR6432);
- case ARMISD::ATOMADD64_DAG:
- return SelectAtomic64(N, ARM::ATOMADD6432);
- case ARMISD::ATOMSUB64_DAG:
- return SelectAtomic64(N, ARM::ATOMSUB6432);
- case ARMISD::ATOMNAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMNAND6432);
- case ARMISD::ATOMAND64_DAG:
- return SelectAtomic64(N, ARM::ATOMAND6432);
- case ARMISD::ATOMSWAP64_DAG:
- return SelectAtomic64(N, ARM::ATOMSWAP6432);
- case ARMISD::ATOMCMPXCHG64_DAG:
- return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
-
- case ARMISD::ATOMMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMMIN6432);
- case ARMISD::ATOMUMIN64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMIN6432);
- case ARMISD::ATOMMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMMAX6432);
- case ARMISD::ATOMUMAX64_DAG:
- return SelectAtomic64(N, ARM::ATOMUMAX6432);
+ case ISD::ATOMIC_LOAD:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_STORE:
+ if (cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64)
+ return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64);
+ else
+ break;
+
+ case ISD::ATOMIC_LOAD_ADD:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_ADD_I8,
+ ARM::ATOMIC_LOAD_ADD_I16,
+ ARM::ATOMIC_LOAD_ADD_I32,
+ ARM::ATOMIC_LOAD_ADD_I64);
+ case ISD::ATOMIC_LOAD_SUB:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_SUB_I8,
+ ARM::ATOMIC_LOAD_SUB_I16,
+ ARM::ATOMIC_LOAD_SUB_I32,
+ ARM::ATOMIC_LOAD_SUB_I64);
+ case ISD::ATOMIC_LOAD_AND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_AND_I8,
+ ARM::ATOMIC_LOAD_AND_I16,
+ ARM::ATOMIC_LOAD_AND_I32,
+ ARM::ATOMIC_LOAD_AND_I64);
+ case ISD::ATOMIC_LOAD_OR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_OR_I8,
+ ARM::ATOMIC_LOAD_OR_I16,
+ ARM::ATOMIC_LOAD_OR_I32,
+ ARM::ATOMIC_LOAD_OR_I64);
+ case ISD::ATOMIC_LOAD_XOR:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_XOR_I8,
+ ARM::ATOMIC_LOAD_XOR_I16,
+ ARM::ATOMIC_LOAD_XOR_I32,
+ ARM::ATOMIC_LOAD_XOR_I64);
+ case ISD::ATOMIC_LOAD_NAND:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_NAND_I8,
+ ARM::ATOMIC_LOAD_NAND_I16,
+ ARM::ATOMIC_LOAD_NAND_I32,
+ ARM::ATOMIC_LOAD_NAND_I64);
+ case ISD::ATOMIC_LOAD_MIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MIN_I8,
+ ARM::ATOMIC_LOAD_MIN_I16,
+ ARM::ATOMIC_LOAD_MIN_I32,
+ ARM::ATOMIC_LOAD_MIN_I64);
+ case ISD::ATOMIC_LOAD_MAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_MAX_I8,
+ ARM::ATOMIC_LOAD_MAX_I16,
+ ARM::ATOMIC_LOAD_MAX_I32,
+ ARM::ATOMIC_LOAD_MAX_I64);
+ case ISD::ATOMIC_LOAD_UMIN:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMIN_I8,
+ ARM::ATOMIC_LOAD_UMIN_I16,
+ ARM::ATOMIC_LOAD_UMIN_I32,
+ ARM::ATOMIC_LOAD_UMIN_I64);
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SelectAtomic(N,
+ ARM::ATOMIC_LOAD_UMAX_I8,
+ ARM::ATOMIC_LOAD_UMAX_I16,
+ ARM::ATOMIC_LOAD_UMAX_I32,
+ ARM::ATOMIC_LOAD_UMAX_I64);
+ case ISD::ATOMIC_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_SWAP_I8,
+ ARM::ATOMIC_SWAP_I16,
+ ARM::ATOMIC_SWAP_I32,
+ ARM::ATOMIC_SWAP_I64);
+ case ISD::ATOMIC_CMP_SWAP:
+ return SelectAtomic(N,
+ ARM::ATOMIC_CMP_SWAP_I8,
+ ARM::ATOMIC_CMP_SWAP_I16,
+ ARM::ATOMIC_CMP_SWAP_I32,
+ ARM::ATOMIC_CMP_SWAP_I64);
}
return SelectCode(N);
@@ -3614,7 +3480,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if(PairedReg.getNode()) {
OpChanged[OpChanged.size() -1 ] = true;
Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
- Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
// Replace the current flag.
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
Flag, MVT::i32);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index caec11e..76a0a83 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <utility>
using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
@@ -174,9 +175,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- if (Subtarget->isTargetDarwin()) {
+ if (Subtarget->isTargetIOS()) {
// Uses VFP for Thumb libfuncs if available.
- if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+ if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
+ Subtarget->hasARMOps()) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -421,7 +423,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+ if (Subtarget->getTargetTriple().isiOS() &&
!Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
@@ -452,6 +454,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -564,16 +567,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- // Custom expand long extensions to vectors.
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
-
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -750,12 +743,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
- // FIXME: This should be checking for v6k, not just v6.
- if (Subtarget->hasDataBarrier() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
- // membarrier needs custom lowering; the rest are legal and handled
- // normally.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+ if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
+ // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and
+ // handled normally.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
@@ -768,11 +759,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
- // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
- setInsertFencesForAtomic(true);
+ // On v8, we have particularly efficient implementations of atomic fences
+ // if they can be combined with nearby atomic loads and stores.
+ if (!Subtarget->hasV8Ops()) {
+ // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
+ setInsertFencesForAtomic(true);
+ }
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
} else {
+ // If there's anything we can use as a barrier, go through custom lowering
+ // for ATOMIC_FENCE.
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
+ Subtarget->hasAnyDataBarrier() ? Custom : Expand);
+
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
@@ -869,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
+
+ // Combine sin / cos into one node or libcall if possible.
+ if (Subtarget->hasSinCos()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ if (Subtarget->getTargetTriple().getOS() == Triple::IOS) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ }
+ }
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
@@ -908,6 +920,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+ bool isThumb2, unsigned &LdrOpc,
+ unsigned &StrOpc) {
+ static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB},
+ {ARM::LDREXH, ARM::t2LDREXH},
+ {ARM::LDREX, ARM::t2LDREX},
+ {ARM::LDREXD, ARM::t2LDREXD}};
+ static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB},
+ {ARM::LDAEXH, ARM::t2LDAEXH},
+ {ARM::LDAEX, ARM::t2LDAEX},
+ {ARM::LDAEXD, ARM::t2LDAEXD}};
+ static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB},
+ {ARM::STREXH, ARM::t2STREXH},
+ {ARM::STREX, ARM::t2STREX},
+ {ARM::STREXD, ARM::t2STREXD}};
+ static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB},
+ {ARM::STLEXH, ARM::t2STLEXH},
+ {ARM::STLEX, ARM::t2STLEX},
+ {ARM::STLEXD, ARM::t2STLEXD}};
+
+ const unsigned (*LoadOps)[2], (*StoreOps)[2];
+ if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ LoadOps = LoadAcqs;
+ else
+ LoadOps = LoadBares;
+
+ if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+ StoreOps = StoreRels;
+ else
+ StoreOps = StoreBares;
+
+ assert(isPowerOf2_32(Size) && Size <= 8 &&
+ "unsupported size for atomic binary op!");
+
+ LdrOpc = LoadOps[Log2_32(Size)][isThumb2];
+ StrOpc = StoreOps[Log2_32(Size)][isThumb2];
+}
+
// FIXME: It might make sense to define the representative register class as the
// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
@@ -970,6 +1020,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMN: return "ARMISD::CMN";
@@ -1009,7 +1060,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
- case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
@@ -1068,6 +1118,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::VMAXNM: return "ARMISD::VMAX";
+ case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
@@ -1092,19 +1144,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
-
- case ARMISD::ATOMADD64_DAG: return "ATOMADD64_DAG";
- case ARMISD::ATOMSUB64_DAG: return "ATOMSUB64_DAG";
- case ARMISD::ATOMOR64_DAG: return "ATOMOR64_DAG";
- case ARMISD::ATOMXOR64_DAG: return "ATOMXOR64_DAG";
- case ARMISD::ATOMAND64_DAG: return "ATOMAND64_DAG";
- case ARMISD::ATOMNAND64_DAG: return "ATOMNAND64_DAG";
- case ARMISD::ATOMSWAP64_DAG: return "ATOMSWAP64_DAG";
- case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG";
- case ARMISD::ATOMMIN64_DAG: return "ATOMMIN64_DAG";
- case ARMISD::ATOMUMIN64_DAG: return "ATOMUMIN64_DAG";
- case ARMISD::ATOMMAX64_DAG: return "ATOMMAX64_DAG";
- case ARMISD::ATOMUMAX64_DAG: return "ATOMUMAX64_DAG";
}
}
@@ -1536,7 +1575,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
MachinePointerInfo(),
- false, false, false, 0);
+ false, false, false,
+ DAG.InferPtrAlignment(AddArg));
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
@@ -1745,24 +1785,26 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
- if (isThisReturn) {
- // For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- if (!Mask) {
- // Set isThisReturn to false if the calling convention is not one that
- // allows 'returned' to be modeled in this way, so LowerCallResult does
- // not try to pass 'this' straight through
- isThisReturn = false;
+ if (!isTailCall) {
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn) {
+ // For 'this' returns, use the R0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ // Set isThisReturn to false if the calling convention is not one that
+ // allows 'returned' to be modeled in this way, so LowerCallResult does
+ // not try to pass 'this' straight through
+ isThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
Mask = ARI->getCallPreservedMask(CallConv);
- }
- } else
- Mask = ARI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -1933,6 +1975,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isVarArg && !Outs.empty())
return false;
+ // Exception-handling functions need a special set of instructions to indicate
+ // a return to the hardware. Tail-calling another function would probably
+ // break this.
+ if (CallerF->hasFnAttribute("interrupt"))
+ return false;
+
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
if (isCalleeStructRet || isCallerStructRet)
@@ -2061,6 +2109,39 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
isVarArg));
}
+static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const Function *F = MF.getFunction();
+
+ StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
+
+ // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
+ // version of the "preferred return address". These offsets affect the return
+ // instruction if this is a return from PL1 without hypervisor extensions.
+ // IRQ/FIQ: +4 "subs pc, lr, #4"
+ // SWI: 0 "subs pc, lr, #0"
+ // ABORT: +4 "subs pc, lr, #4"
+ // UNDEF: +4/+2 "subs pc, lr, #0"
+ // UNDEF varies depending on where the exception came from ARM or Thumb
+ // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
+
+ int64_t LROffset;
+ if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
+ IntKind == "ABORT")
+ LROffset = 4;
+ else if (IntKind == "SWI" || IntKind == "UNDEF")
+ LROffset = 0;
+ else
+ report_fatal_error("Unsupported interrupt attribute. If present, value "
+ "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
+
+ RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
+
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
+ RetOps.data(), RetOps.size());
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2146,6 +2227,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
+ // CPUs which aren't M-class use a special sequence to return from
+ // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
+ // though we use "subs pc, lr, #N").
+ //
+ // M-class CPUs actually use a normal return sequence with a special
+ // (hardware-provided) value in LR, so the normal code path works.
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
+ !Subtarget->isMClass()) {
+ if (Subtarget->isThumb1Only())
+ report_fatal_error("interrupt attribute is not supported in Thumb1");
+ return LowerInterruptReturn(RetOps, dl, DAG);
+ }
+
return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
RetOps.data(), RetOps.size());
}
@@ -2202,7 +2296,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG)
+ if (UI->getOpcode() != ARMISD::RET_FLAG &&
+ UI->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
@@ -2589,7 +2684,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
}
@@ -2597,14 +2692,18 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
unsigned Domain = ARM_MB::ISH;
- if (Subtarget->isSwift() && Ord == Release) {
+ if (Subtarget->isMClass()) {
+ // Only a full system barrier exists in the M-class architectures.
+ Domain = ARM_MB::SY;
+ } else if (Subtarget->isSwift() && Ord == Release) {
// Swift happens to implement ISHST barriers in a way that's compatible with
// Release semantics but weaker than ISH so we'd be fools not to use
// it. Beware: other processors probably don't!
Domain = ARM_MB::ISHST;
}
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(Intrinsic::arm_dmb, MVT::i32),
DAG.getConstant(Domain, MVT::i32));
}
@@ -3177,6 +3276,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+ if (CC == ISD::SETNE)
+ return ISD::SETEQ;
+ return ISD::getSetCCSwappedOperands(CC);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+ bool &swpCmpOps, bool &swpVselOps) {
+ // Start by selecting the GE condition code for opcodes that return true for
+ // 'equality'
+ if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+ CC == ISD::SETULE)
+ CondCode = ARMCC::GE;
+
+ // and GT for opcodes that return false for 'equality'.
+ else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ CondCode = ARMCC::GT;
+
+ // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+ // to swap the compare operands.
+ if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+ CC == ISD::SETULT)
+ swpCmpOps = true;
+
+ // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+ // If we have an unordered opcode, we need to swap the operands to the VSEL
+ // instruction (effectively negating the condition).
+ //
+ // This also has the effect of swapping which one of 'less' or 'greater'
+ // returns true, so we also swap the compare operands. It also switches
+ // whether we return true for 'equality', so we compensate by picking the
+ // opposite condition code to our original choice.
+ if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+ CC == ISD::SETUGT) {
+ swpCmpOps = !swpCmpOps;
+ swpVselOps = !swpVselOps;
+ CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+ }
+
+ // 'ordered' is 'anything but unordered', so use the VS condition code and
+ // swap the VSEL operands.
+ if (CC == ISD::SETO) {
+ CondCode = ARMCC::VS;
+ swpVselOps = true;
+ }
+
+ // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+ // code and swap the VSEL operands.
+ if (CC == ISD::SETUNE) {
+ CondCode = ARMCC::EQ;
+ swpVselOps = true;
+ }
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -3187,15 +3341,66 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (LHS.getValueType() == MVT::i32) {
+ // Try to generate VSEL on ARMv8.
+ // The VSEL instruction can't use all the usual ARM condition
+ // codes: it only has two bits to select the condition code, so it's
+ // constrained to use only GE, GT, VS and EQ.
+ //
+ // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+ // swap the operands of the previous compare instruction (effectively
+ // inverting the compare condition, swapping 'less' and 'greater') and
+ // sometimes need to swap the operands to the VSEL (which inverts the
+ // condition in the sense of firing whenever the previous condition didn't)
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+ CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+ CC = getInverseCCForVSEL(CC);
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+ Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
+ // Try to generate VSEL on ARMv8.
+ if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
+ TrueVal.getValueType() == MVT::f64)) {
+ // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // same operands, as follows:
+ // c = fcmp [ogt, olt, ugt, ult] a, b
+ // select c, a, b
+ // We only do this in unsafe-fp-math, because signed zeros and NaNs are
+ // handled differently than the original code sequence.
+ if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal &&
+ RHS == FalseVal) {
+ if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
+ if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
+ }
+
+ bool swpCmpOps = false;
+ bool swpVselOps = false;
+ checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+ if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+ CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+ if (swpCmpOps)
+ std::swap(LHS, RHS);
+ if (swpVselOps)
+ std::swap(TrueVal, FalseVal);
+ }
+ }
+
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
@@ -3627,47 +3832,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
-/// and size(DestVec) > 128-bits.
-/// This is achieved by doing the one extension from the SrcVec, splitting the
-/// result, extending these parts, and then concatenating these into the
-/// destination.
-static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
- SDValue Op = N->getOperand(0);
- EVT SrcVT = Op.getValueType();
- EVT DestVT = N->getValueType(0);
-
- assert(DestVT.getSizeInBits() > 128 &&
- "Custom sext/zext expansion needs >128-bit vector.");
- // If this is a normal length extension, use the default expansion.
- if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
- SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
- return SDValue();
-
- SDLoc dl(N);
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
- unsigned NumElts = SrcVT.getVectorNumElements();
- LLVMContext &Ctx = *DAG.getContext();
- SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
-
- EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts);
- EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
- NumElts/2);
- EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
- NumElts/2);
-
- Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
- SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(0));
- SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
- DAG.getIntPtrConstant(NumElts/2));
- ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
- ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
-}
-
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -4271,17 +4435,25 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const {
- if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+ if (!ST->hasVFP3())
return SDValue();
+ bool IsDouble = Op.getValueType() == MVT::f64;
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
- assert(Op.getValueType() == MVT::f32 &&
- "ConstantFP custom lowering should only occur for f32.");
// Try splatting with a VMOV.f32...
APFloat FPVal = CFP->getValueAPF();
- int ImmVal = ARM_AM::getFP32Imm(FPVal);
+ int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
+
if (ImmVal != -1) {
+ if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
+ // We have code in place to select a valid ConstantFP already, no need to
+ // do any mangling.
+ return Op;
+ }
+
+ // It's a float and we are trying to use NEON operations where
+ // possible. Lower it to a splat followed by an extract.
SDLoc DL(Op);
SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
@@ -4290,15 +4462,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(0, MVT::i32));
}
- // If that fails, try a VMOV.i32
+ // The rest of our options are NEON only, make sure that's allowed before
+ // proceeding..
+ if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
+ return SDValue();
+
EVT VMovVT;
- unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
- SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
- VMOVModImm);
+ uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
+
+ // It wouldn't really be worth bothering for doubles except for one very
+ // important value, which does happen to match: 0.0. So make sure we don't do
+ // anything stupid.
+ if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
+ return SDValue();
+
+ // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
+ SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMOVModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
NewVal);
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -4306,11 +4494,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
}
// Finally, try a VMVN.i32
- NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
- VMVNModImm);
+ NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT,
+ false, VMVNModImm);
if (NewVal != SDValue()) {
SDLoc DL(Op);
SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+
+ if (IsDouble)
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
+
+ // It's a float: cast and extract a vector element.
SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
VecConstant);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
@@ -5769,6 +5962,70 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin());
+
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // return values are passed via sret.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Pair of floats / doubles used to pass the result.
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+
+ // Create stack object for sret.
+ const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
+ const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+
+ const char *LibcallName = (ArgVT == MVT::f64)
+ ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+
+ TargetLowering::
+ CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0,
+ CallingConv::C, /*isTaillCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed*/false,
+ Callee, Args, DAG, dl);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
+ MachinePointerInfo(), false, false, false, 0);
+
+ // Address of cos field.
+ SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+ DAG.getIntPtrConstant(ArgVT.getStoreSize()));
+ SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
+ MachinePointerInfo(), false, false, false, 0);
+
+ SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
+ LoadSin.getValue(0), LoadCos.getValue(0));
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -5781,32 +6038,28 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
static void
ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG, unsigned NewOp) {
+ SelectionDAG &DAG) {
SDLoc dl(Node);
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
SmallVector<SDValue, 6> Ops;
Ops.push_back(Node->getOperand(0)); // Chain
Ops.push_back(Node->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
- // High part of Val1
+ for(unsigned i=2; i<Node->getNumOperands(); i++) {
+ // Low part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
+ Node->getOperand(i), DAG.getIntPtrConstant(0)));
+ // High part
Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(3), DAG.getIntPtrConstant(1)));
+ Node->getOperand(i), DAG.getIntPtrConstant(1)));
}
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
+ DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(),
+ cast<MemSDNode>(Node)->getMemOperand(), AN->getOrdering(),
+ AN->getSynchScope());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
Results.push_back(Result.getValue(2));
@@ -5904,6 +6157,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
}
@@ -5921,10 +6175,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = ExpandVectorExtension(N, DAG);
- break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
@@ -5932,41 +6182,21 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_STORE:
+ case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
- return;
case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
- return;
case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
- return;
case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
- return;
case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
- return;
case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+ ReplaceATOMIC_OP_64(N, Results, DAG);
return;
}
if (Res.getNode())
@@ -5986,6 +6216,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6001,21 +6232,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6095,6 +6312,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6102,24 +6320,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc;
- switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
- case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
- break;
- case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
- break;
- case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
- break;
- }
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -6203,6 +6408,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg();
unsigned oldval = dest;
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6210,24 +6416,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
if (isThumb2) {
MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(incr, &ARM::rGPRRegClass);
}
unsigned ldrOpc, strOpc, extendOpc;
+ getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc);
switch (Size) {
- default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!");
case 1:
- ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
- strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
break;
case 2:
- ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
- strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
break;
case 4:
- ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
- strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
extendOpc = 0;
break;
}
@@ -6271,7 +6473,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
// Sign extend the value, if necessary.
if (signExtend && extendOpc) {
- oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass
+ : &ARM::GPRnopcRegClass);
+ if (!isThumb2)
+ MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass);
AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
.addReg(dest)
.addImm(0));
@@ -6309,7 +6514,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Op1, unsigned Op2,
bool NeedsCarry, bool IsCmpxchg,
bool IsMinMax, ARMCC::CondCodes CC) const {
- // This also handles ATOMIC_SWAP, indicated by Op1==0.
+ // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6317,11 +6522,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction::iterator It = BB;
++It;
+ bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64);
+ unsigned offset = (isStore ? -2 : 0);
unsigned destlo = MI->getOperand(0).getReg();
unsigned desthi = MI->getOperand(1).getReg();
- unsigned ptr = MI->getOperand(2).getReg();
- unsigned vallo = MI->getOperand(3).getReg();
- unsigned valhi = MI->getOperand(4).getReg();
+ unsigned ptr = MI->getOperand(offset+2).getReg();
+ unsigned vallo = MI->getOperand(offset+3).getReg();
+ unsigned valhi = MI->getOperand(offset+4).getReg();
+ unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5);
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(OrdIdx).getImm());
DebugLoc dl = MI->getDebugLoc();
bool isThumb2 = Subtarget->isThumb2();
@@ -6330,8 +6539,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(vallo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(valhi, &ARM::rGPRRegClass);
}
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6371,21 +6585,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// fallthrough --> exitMBB
BB = loopMBB;
- // Load
- if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
- .addReg(destlo, RegState::Define)
- .addReg(desthi, RegState::Define)
- .addReg(ptr));
- } else {
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
+ if (!isStore) {
+ // Load
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
}
unsigned StoreLo, StoreHi;
@@ -6437,7 +6653,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// Store
if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StoreLo).addReg(StoreHi).addReg(ptr));
} else {
// Marshal a pair...
@@ -6455,7 +6673,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addImm(ARM::gsub_1);
// ...and store it
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
.addReg(StorePair).addReg(ptr));
}
// Cmp+jump
@@ -6476,6 +6694,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const {
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ unsigned destlo = MI->getOperand(0).getReg();
+ unsigned desthi = MI->getOperand(1).getReg();
+ unsigned ptr = MI->getOperand(2).getReg();
+ AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ if (isThumb2) {
+ MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
+ MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
+ }
+ unsigned ldrOpc, strOpc;
+ getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc);
+
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc));
+
+ if (isThumb2) {
+ MIB.addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr);
+
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ MIB.addReg(GPRPair0, RegState::Define).addReg(ptr);
+
+ // Copy GPRPair0 into dest. (This copy will normally be coalesced.)
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
+ }
+ AddDefaultPred(MIB);
+
+ MI->eraseFromParent(); // The instruction is gone now.
+
+ return BB;
+}
+
/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
/// registers the function context.
void ARMTargetLowering::
@@ -7007,8 +7270,109 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
llvm_unreachable("Expecting a BB with two successors!");
}
-MachineBasicBlock *ARMTargetLowering::
-EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
+/// Return the load opcode for a given load size. If load size >= 8,
+/// neon opcode will be returned.
+static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
+ if (LdSize >= 8)
+ return LdSize == 16 ? ARM::VLD1q32wb_fixed
+ : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
+ if (IsThumb1)
+ return LdSize == 4 ? ARM::tLDRi
+ : LdSize == 2 ? ARM::tLDRHi
+ : LdSize == 1 ? ARM::tLDRBi : 0;
+ if (IsThumb2)
+ return LdSize == 4 ? ARM::t2LDR_POST
+ : LdSize == 2 ? ARM::t2LDRH_POST
+ : LdSize == 1 ? ARM::t2LDRB_POST : 0;
+ return LdSize == 4 ? ARM::LDR_POST_IMM
+ : LdSize == 2 ? ARM::LDRH_POST
+ : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
+}
+
+/// Return the store opcode for a given store size. If store size >= 8,
+/// neon opcode will be returned.
+static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
+ if (StSize >= 8)
+ return StSize == 16 ? ARM::VST1q32wb_fixed
+ : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
+ if (IsThumb1)
+ return StSize == 4 ? ARM::tSTRi
+ : StSize == 2 ? ARM::tSTRHi
+ : StSize == 1 ? ARM::tSTRBi : 0;
+ if (IsThumb2)
+ return StSize == 4 ? ARM::t2STR_POST
+ : StSize == 2 ? ARM::t2STRH_POST
+ : StSize == 1 ? ARM::t2STRB_POST : 0;
+ return StSize == 4 ? ARM::STR_POST_IMM
+ : StSize == 2 ? ARM::STRH_POST
+ : StSize == 1 ? ARM::STRB_POST_IMM : 0;
+}
+
+/// Emit a post-increment load operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned LdSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
+ assert(LdOpc != 0 && "Should have a load opcode");
+ if (LdSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(0));
+ } else if (IsThumb1) {
+ // load + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(LdSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addImm(LdSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define).addReg(AddrIn)
+ .addReg(0).addImm(LdSize));
+ }
+}
+
+/// Emit a post-increment store operation with given size. The instructions
+/// will be added to BB at Pos.
+static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos,
+ const TargetInstrInfo *TII, DebugLoc dl,
+ unsigned StSize, unsigned Data, unsigned AddrIn,
+ unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
+ unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
+ assert(StOpc != 0 && "Should have a store opcode");
+ if (StSize >= 8) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn).addImm(0).addReg(Data));
+ } else if (IsThumb1) {
+ // store + update AddrIn
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
+ .addReg(AddrIn).addImm(0));
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(AddrIn).addImm(StSize);
+ AddDefaultPred(MIB);
+ } else if (IsThumb2) {
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addImm(StSize));
+ } else { // arm
+ AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data).addReg(AddrIn).addReg(0)
+ .addImm(StSize));
+ }
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitStructByval(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
// This pseudo instruction has 3 operands: dst, src, size
// We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
// Otherwise, we will generate unrolled scalar copies.
@@ -7023,23 +7387,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned Align = MI->getOperand(3).getImm();
DebugLoc dl = MI->getDebugLoc();
- bool isThumb2 = Subtarget->isThumb2();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned ldrOpc, strOpc, UnitSize = 0;
+ unsigned UnitSize = 0;
+ const TargetRegisterClass *TRC = 0;
+ const TargetRegisterClass *VecTRC = 0;
- const TargetRegisterClass *TRC = isThumb2 ?
- (const TargetRegisterClass*)&ARM::tGPRRegClass :
- (const TargetRegisterClass*)&ARM::GPRRegClass;
- const TargetRegisterClass *TRC_Vec = 0;
+ bool IsThumb1 = Subtarget->isThumb1Only();
+ bool IsThumb2 = Subtarget->isThumb2();
if (Align & 1) {
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
UnitSize = 1;
} else if (Align & 2) {
- ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
- strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
UnitSize = 2;
} else {
// Check whether we can use NEON instructions.
@@ -7047,27 +7406,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
hasAttribute(AttributeSet::FunctionIndex,
Attribute::NoImplicitFloat) &&
Subtarget->hasNEON()) {
- if ((Align % 16 == 0) && SizeVal >= 16) {
- ldrOpc = ARM::VLD1q32wb_fixed;
- strOpc = ARM::VST1q32wb_fixed;
+ if ((Align % 16 == 0) && SizeVal >= 16)
UnitSize = 16;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
- }
- else if ((Align % 8 == 0) && SizeVal >= 8) {
- ldrOpc = ARM::VLD1d32wb_fixed;
- strOpc = ARM::VST1d32wb_fixed;
+ else if ((Align % 8 == 0) && SizeVal >= 8)
UnitSize = 8;
- TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
- }
}
// Can't use NEON instructions.
- if (UnitSize == 0) {
- ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
+ if (UnitSize == 0)
UnitSize = 4;
- }
}
+ // Select the correct opcode and register class for unit size load/store
+ bool IsNeon = UnitSize >= 8;
+ TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass
+ : (const TargetRegisterClass *)&ARM::GPRRegClass;
+ if (IsNeon)
+ VecTRC = UnitSize == 16
+ ? (const TargetRegisterClass *)&ARM::DPairRegClass
+ : UnitSize == 8
+ ? (const TargetRegisterClass *)&ARM::DPRRegClass
+ : 0;
+
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7078,34 +7437,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
unsigned srcIn = src;
unsigned destIn = dest;
for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(destIn).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc), scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7113,30 +7451,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Handle the leftover bytes with LDRB and STRB.
// [scratch, srcOut] = LDRB_POST(srcIn, 1)
// [destOut] = STRB_POST(scratch, destIn, 1)
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, MI, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn)
- .addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7177,17 +7499,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- unsigned VReg1 = varEnd;
+ if (IsThumb2) {
+ unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
- VReg1 = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(LoopSize & 0xFFFF));
+ Vtmp = MRI.createVirtualRegister(TRC);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(VReg1)
- .addImm(LoopSize >> 16));
+ .addReg(Vtmp).addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7199,10 +7520,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
Align = getDataLayout()->getTypeAllocSize(C->getType());
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
- .addReg(varEnd, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
+ if (IsThumb1)
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ else
+ AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
+ varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
}
BB->addSuccessor(loopMBB);
@@ -7231,39 +7554,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
// [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
- unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
- if (UnitSize >= 8) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(destPhi).addImm(0).addReg(scratch));
- } else if (isThumb2) {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addImm(UnitSize));
- } else {
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
- .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
- .addImm(UnitSize));
-
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
- .addReg(scratch).addReg(destPhi)
- .addReg(0).addImm(UnitSize));
- }
+ unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
+ emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
+ IsThumb1, IsThumb2);
// Decrement loop variable by UnitSize.
- MachineInstrBuilder MIB = BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
- MIB->getOperand(5).setReg(ARM::CPSR);
- MIB->getOperand(5).setIsDef(true);
-
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
- .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ if (IsThumb1) {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
+ MIB = AddDefaultT1CC(MIB);
+ MIB.addReg(varPhi).addImm(UnitSize);
+ AddDefaultPred(MIB);
+ } else {
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
+ AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB->getOperand(5).setReg(ARM::CPSR);
+ MIB->getOperand(5).setIsDef(true);
+ }
+ BuildMI(*BB, BB->end(), dl,
+ TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
// loopMBB can loop back to loopMBB or fall through to exitMBB.
BB->addSuccessor(loopMBB);
@@ -7272,34 +7586,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
// Add epilogue to handle BytesLeft.
BB = exitMBB;
MachineInstr *StartOfExit = exitMBB->begin();
- ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
- strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
// [scratch, srcOut] = LDRB_POST(srcLoop, 1)
// [destOut] = STRB_POST(scratch, destLoop, 1)
unsigned srcIn = srcLoop;
unsigned destIn = destLoop;
for (unsigned i = 0; i < BytesLeft; i++) {
- unsigned scratch = MRI.createVirtualRegister(TRC);
unsigned srcOut = MRI.createVirtualRegister(TRC);
unsigned destOut = MRI.createVirtualRegister(TRC);
- if (isThumb2) {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addImm(1));
- } else {
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
- TII->get(ldrOpc),scratch)
- .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
-
- AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
- .addReg(scratch).addReg(destIn)
- .addReg(0).addImm(1));
- }
+ unsigned scratch = MRI.createVirtualRegister(TRC);
+ emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
+ IsThumb1, IsThumb2);
+ emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
+ IsThumb1, IsThumb2);
srcIn = srcOut;
destIn = destOut;
}
@@ -7449,46 +7748,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+ case ARM::ATOMIC_LOAD_I64:
+ return EmitAtomicLoad64(MI, BB);
- case ARM::ATOMADD6432:
+ case ARM::ATOMIC_LOAD_ADD_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMSUB6432:
+ case ARM::ATOMIC_LOAD_SUB_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true);
- case ARM::ATOMOR6432:
+ case ARM::ATOMIC_LOAD_OR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
- case ARM::ATOMXOR6432:
+ case ARM::ATOMIC_LOAD_XOR_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
isThumb2 ? ARM::t2EORrr : ARM::EORrr);
- case ARM::ATOMAND6432:
+ case ARM::ATOMIC_LOAD_AND_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
- case ARM::ATOMSWAP6432:
+ case ARM::ATOMIC_STORE_I64:
+ case ARM::ATOMIC_SWAP_I64:
return EmitAtomicBinary64(MI, BB, 0, 0, false);
- case ARM::ATOMCMPXCHG6432:
+ case ARM::ATOMIC_CMP_SWAP_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ false, /*IsCmpxchg*/true);
- case ARM::ATOMMIN6432:
+ case ARM::ATOMIC_LOAD_MIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LT);
- case ARM::ATOMMAX6432:
+ case ARM::ATOMIC_LOAD_MAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::GE);
- case ARM::ATOMUMIN6432:
+ case ARM::ATOMIC_LOAD_UMIN_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
/*IsMinMax*/ true, ARMCC::LO);
- case ARM::ATOMUMAX6432:
+ case ARM::ATOMIC_LOAD_UMAX_I64:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
@@ -8197,6 +8499,13 @@ static SDValue PerformSUBCombine(SDNode *N,
/// is faster than
/// vadd d3, d0, d1
/// vmul d3, d3, d2
+// However, for (A + B) * (A + B),
+// vadd d2, d0, d1
+// vmul d3, d0, d2
+// vmla d3, d1, d2
+// is slower than
+// vadd d2, d0, d1
+// vmul d3, d2, d2
static SDValue PerformVMULCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -8216,6 +8525,9 @@ static SDValue PerformVMULCombine(SDNode *N,
std::swap(N0, N1);
}
+ if (N0 == N1)
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue N00 = N0->getOperand(0);
@@ -10548,6 +10860,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
case 'r':
return RCPair(0U, &ARM::GPRRegClass);
case 'w':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
@@ -10556,6 +10870,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return RCPair(0U, &ARM::QPRRegClass);
break;
case 'x':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 44c769f..90facdd 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -52,6 +52,7 @@ namespace llvm {
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
+ INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
@@ -94,7 +95,6 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
- MEMBARRIER, // Memory barrier (DMB)
MEMBARRIER_MCR, // Memory barrier (MCR)
PRELOAD, // Preload
@@ -186,6 +186,8 @@ namespace llvm {
// Floating-point max and min:
FMAX,
FMIN,
+ VMAXNM,
+ VMINNM,
// Bit-field insert
BFI,
@@ -222,21 +224,7 @@ namespace llvm {
VST4_UPD,
VST2LN_UPD,
VST3LN_UPD,
- VST4LN_UPD,
-
- // 64-bit atomic ops (value split into two registers)
- ATOMADD64_DAG,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMSWAP64_DAG,
- ATOMCMPXCHG64_DAG,
- ATOMMIN64_DAG,
- ATOMUMIN64_DAG,
- ATOMMAX64_DAG,
- ATOMUMAX64_DAG
+ VST4LN_UPD
};
}
@@ -375,6 +363,12 @@ namespace llvm {
/// be used for loads / stores from the global.
virtual unsigned getMaximalGlobalOffset() const;
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -460,6 +454,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
@@ -573,6 +568,8 @@ namespace llvm {
unsigned Size,
bool signExtend,
ARMCC::CondCodes Cond) const;
+ MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
void SetupEntryBlockForSjLj(MachineInstr *MI,
MachineBasicBlock *MBB,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 1349476..f93504f 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -155,6 +155,16 @@ def pred : PredicateOperand<OtherVT, (ops i32imm, i32imm),
let DecoderMethod = "DecodePredicateOperand";
}
+// Selectable predicate operand for CMOV instructions. We can't use a normal
+// predicate because the default values interfere with instruction selection. In
+// all other respects it is identical though: pseudo-instruction expansion
+// relies on the MachineOperands being compatible.
+def cmovpred : Operand<i32>, PredicateOp,
+ ComplexPattern<i32, 2, "SelectCMOVPred"> {
+ let MIOperandInfo = (ops i32imm, i32imm);
+ let PrintMethod = "printPredicateOperand";
+}
+
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def CCOutOperand : AsmOperandClass { let Name = "CCOut"; }
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
@@ -237,6 +247,8 @@ class t2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[IsThumb2]>;
class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
+class VFP2DPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2,HasDPVFP]>;
class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
: InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
@@ -490,8 +502,7 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-// Atomic load/store instructions
-class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIldr_ex_or_acq<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
@@ -502,23 +513,52 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{20} = 1;
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
- let Inst{11-0} = 0b111110011111;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-0} = 0b10011111;
}
-class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
- bits<4> Rd;
bits<4> Rt;
bits<4> addr;
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
let Inst{20} = 0;
let Inst{19-16} = addr;
- let Inst{15-12} = Rd;
- let Inst{11-4} = 0b11111001;
+ let Inst{11-10} = 0b11;
+ let Inst{9-8} = opcod2;
+ let Inst{7-4} = 0b1001;
let Inst{3-0} = Rt;
}
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b11, oops, iops, itin, opc, asm, pattern>;
+
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b11, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
+// Exclusive load/store instructions
+
+class AIldaex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstlex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b10, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ bits<4> Rd;
+ let Inst{15-12} = Rd;
+}
+
class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
: AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, $addr", pattern> {
bits<4> Rt;
@@ -535,6 +575,18 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
let Unpredictable{11-8} = 0b1111;
let DecoderMethod = "DecodeSwap";
}
+// Acquire/Release load/store instructions
+class AIldracq<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIldr_ex_or_acq<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]>;
+
+class AIstrrel<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AIstr_ex_or_rel<opcod, 0b00, oops, iops, itin, opc, asm, pattern>,
+ Requires<[IsARM, HasV8]> {
+ let Inst{15-12} = 0b1111;
+}
// addrmode1 instructions
class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1520,6 +1572,8 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
let Inst{8} = 1; // Double precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Double precision, unary, not-predicated
@@ -1572,6 +1626,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{8} = 1; // Double precision
let Inst{6} = op6;
let Inst{4} = op4;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// FP, binary, not predicated
@@ -1601,6 +1657,8 @@ class ADbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops,
let Inst{8} = 1; // double precision
let Inst{6} = opcod3;
let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Single precision, unary, predicated
@@ -1965,7 +2023,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
}
// Same as N2V but not predicated.
-class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
@@ -1982,7 +2040,7 @@ class N2Vnp<bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
// Encode constant bits
let Inst{27-23} = 0b00111;
let Inst{21-20} = 0b11;
- let Inst{19-18} = 0b10;
+ let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
let Inst{11} = 0;
let Inst{10-8} = op10_8;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 8cdb853..df867b4 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -119,17 +120,29 @@ namespace {
MachineBasicBlock &FirstMBB = MF.front();
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ unsigned TempReg =
+ MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ?
ARM::t2LDRpci : ARM::LDRcp;
const TargetInstrInfo &TII = *TM->getInstrInfo();
MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), GlobalBaseReg)
+ TII.get(Opc), TempReg)
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
AddDefaultPred(MIB);
+ // Fix the GOT address by adding pc.
+ unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
+ Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::tPICADD
+ : ARM::PICADD;
+ MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (Opc == ARM::PICADD)
+ AddDefaultPred(MIB);
+
+
return true;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c243402..2042c04 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -118,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
+def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -162,8 +166,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
-def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
- [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
@@ -174,9 +176,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
+def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -189,6 +193,9 @@ def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">,
def HasV6 : Predicate<"Subtarget->hasV6Ops()">,
AssemblerPredicate<"HasV6Ops", "armv6">;
def NoV6 : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
+ AssemblerPredicate<"HasV6MOps",
+ "armv6m or armv6t2">;
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
@@ -196,6 +203,8 @@ def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
AssemblerPredicate<"HasV8Ops", "armv8">;
+def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
+ AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -203,16 +212,23 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">,
AssemblerPredicate<"FeatureVFP3", "VFP3">;
def HasVFP4 : Predicate<"Subtarget->hasVFP4()">,
AssemblerPredicate<"FeatureVFP4", "VFP4">;
-def HasV8FP : Predicate<"Subtarget->hasV8FP()">,
- AssemblerPredicate<"FeatureV8FP", "V8FP">;
+def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">,
+ AssemblerPredicate<"!FeatureVFPOnlySP",
+ "double precision VFP">;
+def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
+ AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "NEON">;
+def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
- AssemblerPredicate<"FeatureHWDiv", "divide">;
+ AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
- AssemblerPredicate<"FeatureHWDivARM">;
+ AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
@@ -237,10 +253,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">,
AssemblerPredicate<"ModeThumb,FeatureThumb2",
"thumb2">;
def IsMClass : Predicate<"Subtarget->isMClass()">,
- AssemblerPredicate<"FeatureMClass", "armv7m">;
-def IsARClass : Predicate<"!Subtarget->isMClass()">,
+ AssemblerPredicate<"FeatureMClass", "armv*m">;
+def IsNotMClass : Predicate<"!Subtarget->isMClass()">,
AssemblerPredicate<"!FeatureMClass",
- "armv7a/r">;
+ "!armv*m">;
def IsARM : Predicate<"!Subtarget->isThumb()">,
AssemblerPredicate<"!ModeThumb", "arm-mode">;
def IsIOS : Predicate<"Subtarget->isTargetIOS()">;
@@ -587,17 +603,6 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
-/// imm0_4 predicate - Immediate in the range [0,4].
-def Imm0_4AsmOperand : ImmAsmOperand
-{
- let Name = "Imm0_4";
- let DiagnosticType = "ImmRange0_4";
-}
-def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
- let ParserMatchClass = Imm0_4AsmOperand;
- let DecoderMethod = "DecodeImm0_4";
-}
-
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -677,6 +682,15 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63AsmOperand;
}
+/// imm0_239 predicate - Immediate in the range [0,239].
+def Imm0_239AsmOperand : ImmAsmOperand {
+ let Name = "Imm0_239";
+ let DiagnosticType = "ImmRange0_239";
+}
+def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
+ let ParserMatchClass = Imm0_239AsmOperand;
+}
+
/// imm0_255 predicate - Immediate in the range [0,255].
def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -708,6 +722,11 @@ def imm0_65535_expr : Operand<i32> {
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
+def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def imm256_65535_expr : Operand<i32> {
+ let ParserMatchClass = Imm256_65535ExprAsmOperand;
+}
+
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
@@ -1665,53 +1684,11 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
[(ARMcallseq_start timm:$amt)]>;
}
-// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These pseudos use a hand-written selection code).
-let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
-def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
- GPR:$set1, GPR:$set2),
- NoItinerary, []>;
-def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$addr, GPR:$src1, GPR:$src2),
- NoItinerary, []>;
-}
-
-def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<3> imm;
- let Inst{27-3} = 0b0011001000001111000000000;
- let Inst{2-0} = imm;
+ bits<8> imm;
+ let Inst{27-8} = 0b00110010000011110000;
+ let Inst{7-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -1719,6 +1696,9 @@ def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
+
+def : Pat<(int_arm_sevl), (HINT 5)>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
@@ -1746,6 +1726,16 @@ def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
let Inst{7-4} = 0b0111;
}
+def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+ "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
+ let Inst{27-20} = 0b00010000;
+ let Inst{31-28} = 0xe; // AL
+ let Inst{7-4} = 0b0111;
+}
+
// Change Processor State
// FIXME: We should use InstAlias to handle the optional operands.
class CPS<dag iops, string asm_ops>
@@ -1820,7 +1810,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>;
def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary,
- "setend\t$end", []>, Requires<[IsARM]> {
+ "setend\t$end", []>, Requires<[IsARM]>, Deprecated<HasV8Ops> {
bits<1> end;
let Inst{31-10} = 0b1111000100000001000000;
let Inst{9} = end;
@@ -1953,6 +1943,12 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
+
+ // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets
+ // the user-space one).
+ def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
+ 4, IIC_Br,
+ [(ARMintretflag imm:$offset)]>;
}
// Indirect branches
@@ -2283,6 +2279,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
[]>, Requires<[IsARM, HasV5TE]>;
}
+def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "lda", "\t$Rt, $addr", []>;
+def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldab", "\t$Rt, $addr", []>;
+def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldah", "\t$Rt, $addr", []>;
+
// Indexed loads
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
@@ -2825,6 +2828,12 @@ multiclass AI3strT<bits<4> op, string opc> {
defm STRHT : AI3strT<0b1011, "strht">;
+def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stl", "\t$Rt, $addr", []>;
+def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlb", "\t$Rt, $addr", []>;
+def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlh", "\t$Rt, $addr", []>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
@@ -4014,6 +4023,45 @@ def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000),
(PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>;
//===----------------------------------------------------------------------===//
+// CRC Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class AI_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary,
+ !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm",
+ [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>,
+ Requires<[IsARM, HasV8, HasCRC]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{31-28} = 0b1110;
+ let Inst{27-23} = 0b00010;
+ let Inst{22-21} = sz;
+ let Inst{20} = 0;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = 0b00;
+ let Inst{9} = C;
+ let Inst{8} = 0;
+ let Inst{7-4} = 0b0100;
+ let Inst{3-0} = Rm;
+
+ let Unpredictable{11-8} = 0b1101;
+}
+
+def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>;
+def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>;
+def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
+def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
@@ -4139,56 +4187,65 @@ def BCCZi64 : PseudoInst<(outs),
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
-def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
+def MOVCCr : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, GPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+ [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_imm:$shift, pred:$p),
- 4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift,
- imm:$cc, CCR:$ccr))*/]>,
+ (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p),
+ 4, IIC_iCMOVsr,
+ [(set GPR:$Rd,
+ (ARMcmov GPR:$false, so_reg_imm:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_reg_reg:$shift, pred:$p),
+ (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p),
4, IIC_iCMOVsr,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
- imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, imm0_65535_expr:$imm, pred:$p),
- 4, IIC_iMOVi,
- []>,
+def MOVCCi16
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iMOVi,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
let isMoveImm = 1 in
def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
// Two instruction predicate mov immediate.
let isMoveImm = 1 in
-def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, i32imm:$src, pred:$p),
- 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+def MOVCCi32imm
+ : ARMPseudoInst<(outs GPR:$Rd),
+ (ins GPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
let isMoveImm = 1 in
def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
- (ins GPR:$false, so_imm:$imm, pred:$p),
+ (ins GPR:$false, so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
- [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+ [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm,
+ cmovpred:$p))]>,
RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
} // neverHasSideEffects
@@ -4221,7 +4278,7 @@ def instsyncb_opt : Operand<i32> {
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff05;
@@ -4230,7 +4287,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
- "dsb", "\t$opt", []>,
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff04;
@@ -4246,124 +4303,219 @@ def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
+let usesCustomInserter = 1, Defs = [CPSR] in {
+
// Pseudo instruction that combines movs + predicated rsbmi
// to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in
-def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
+ def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
-let usesCustomInserter = 1 in {
- let Defs = [CPSR] in {
+// Atomic pseudo-insts which will be lowered to ldrex/strex loops.
+// (64-bit pseudos use a hand-written selection code).
+ let mayLoad = 1, mayStore = 1 in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$incr, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_MAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
- [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
-
- def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
- def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$val, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
-
- def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
- def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
-}
+ (outs GPR:$dst),
+ (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_ADD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_SUB_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_AND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_OR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_XOR_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_NAND_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_MAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMIN_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_LOAD_UMAX_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
+ def ATOMIC_CMP_SWAP_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
+ GPR:$set1, GPR:$set2, i32imm:$ordering),
+ NoItinerary, []>;
+ }
+ let mayLoad = 1 in
+ def ATOMIC_LOAD_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, i32imm:$ordering),
+ NoItinerary, []>;
+ let mayStore = 1 in
+ def ATOMIC_STORE_I64 : PseudoInst<
+ (outs GPR:$dst1, GPR:$dst2),
+ (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering),
+ NoItinerary, []>;
}
let usesCustomInserter = 1 in {
@@ -4402,8 +4554,7 @@ def strex_4 : PatFrag<(ops node:$val, node:$ptr),
let mayLoad = 1 in {
def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
- NoItinerary,
- "ldrexb", "\t$Rt, $addr",
+ NoItinerary, "ldrexb", "\t$Rt, $addr",
[(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrexh", "\t$Rt, $addr",
@@ -4412,10 +4563,22 @@ def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
NoItinerary, "ldrex", "\t$Rt, $addr",
[(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>;
let hasExtraDefRegAllocReq = 1 in
-def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
NoItinerary, "ldrexd", "\t$Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegLoad";
}
+
+def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexb", "\t$Rt, $addr", []>;
+def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaexh", "\t$Rt, $addr", []>;
+def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
+ NoItinerary, "ldaex", "\t$Rt, $addr", []>;
+let hasExtraDefRegAllocReq = 1 in
+def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr),
+ NoItinerary, "ldaexd", "\t$Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegLoad";
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
@@ -4434,8 +4597,22 @@ def STREXD : AIstrex<0b01, (outs GPR:$Rd),
NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> {
let DecoderMethod = "DecodeDoubleRegStore";
}
+def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexb", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexh", "\t$Rd, $Rt, $addr",
+ []>;
+def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlex", "\t$Rd, $Rt, $addr",
+ []>;
+let hasExtraSrcRegAllocReq = 1 in
+def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
+ (ins GPRPairOp:$Rt, addr_offset_none:$addr),
+ NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> {
+ let DecoderMethod = "DecodeDoubleRegStore";
+}
}
-
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[(int_arm_clrex)]>,
@@ -4452,12 +4629,43 @@ def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr),
def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr),
(STREXH GPR:$Rt, addr_offset_none:$addr)>;
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
+def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
+def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
+
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Release || Ordering == SequentiallyConsistent;
+}]>;
+
+def atomic_store_release_8 : releasing_store<atomic_store_8>;
+def atomic_store_release_16 : releasing_store<atomic_store_16>;
+def atomic_store_release_32 : releasing_store<atomic_store_32>;
+
+let AddedComplexity = 8 in {
+ def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>;
+ def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
+}
+
// SWP/SWPB are deprecated in V6/V7.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
+ Requires<[PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
- (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>;
+ (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
+ Requires<[PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4468,7 +4676,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4489,7 +4698,8 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
- imm:$CRm, imm:$opc2)]> {
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -4667,10 +4877,10 @@ defm LDC : LdStCop <1, 0, "ldc">;
defm LDCL : LdStCop <1, 1, "ldcl">;
defm STC : LdStCop <0, 0, "stc">;
defm STCL : LdStCop <0, 1, "stcl">;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2">;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l">;
-defm STC2 : LdSt2Cop<0, 0, "stc2">;
-defm STC2L : LdSt2Cop<0, 1, "stc2l">;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -4703,7 +4913,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -4746,14 +4957,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
- imm0_7:$opc2), []>;
+ imm0_7:$opc2), []>,
+ Requires<[PreV8]>;
def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -4790,7 +5003,8 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
: ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary,
- !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
+ !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
+ Requires<[PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5341,4 +5555,5 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
// 'it' blocks in ARM mode just validate the predicates. The IT itself
// is discarded.
-def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
+ ComplexDeprecationPredicate<"IT">;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index af4f4d1..43bd4c2 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2355,17 +2355,36 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt, ResTy, OpTy,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
+class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Same as N2VQIntXnp but with Vd as a src register.
+class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
+ bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
+ : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
+ itin, OpcodeStr, Dt, ResTy, OpTy,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
// Narrow 2-register operations.
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -2534,7 +2553,7 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
@@ -2592,6 +2611,19 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+// Same as N3VQIntnp but with Vd as a src register.
+class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, Format f, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
+ Dt, ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
+ (OpTy QPR:$Vm))))]> {
+ let Constraints = "$src = $Vd";
+}
+
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -2842,6 +2874,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, SDNode OpNode>
@@ -2897,6 +2930,17 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
let isCommutable = Commutable;
}
+
+// Same as above, but not predicated.
+class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4, InstrItinClass itin, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy,
+ SDPatternOperator IntOp, bit Commutable>
+ : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
+ (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
+ ResTy, OpTy, IntOp, Commutable,
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
@@ -3973,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
-defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
- int_arm_neon_vaddhn, 1>;
+defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
// VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;
+def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Multiply Operations.
// VMUL : Vector Multiply (integer, polynomial and floating-point)
@@ -4016,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfd DPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))),
+ (VMULslfq QPR:$Rn,
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
+ (i32 0))>;
+
+
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -4061,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", NEONvmulls, 1>;
-defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", NEONvmullu, 1>;
-def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
- v8i16, v8i8, int_arm_neon_vmullp, 1>;
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "NEONData" in {
+ defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+ defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
+ def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+ v8i16, v8i8, int_arm_neon_vmullp, 1>;
+ def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
+ "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
+ Requires<[HasV8, HasCrypto]>;
+}
defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
@@ -4133,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlal", "s", int_arm_neon_vqdmlal>;
-defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+ "vqdmlal", "s", null_frag>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// VMLS : Vector Multiply Subtract (integer and floating-point)
defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -4190,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
- "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
-defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+ "vqdmlsl", "s", null_frag>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>;
+
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
+def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
+def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1),
+ (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
v2f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
v2f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4256,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
-defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
- int_arm_neon_vsubhn, 0>;
+defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
// VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;
+def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+ (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+ (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
+def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+ (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
+
// Vector Comparisons.
// VCEQ : Vector Compare Equal
@@ -5047,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// Vector Move Operations.
// VMOV : Vector Move (Register)
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-def : InstAlias<"vmov${p} $Vd, $Vm",
- (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p} $Vd, $Vm",
+ (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
// VMOV : Vector Move (Immediate)
@@ -5461,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+ (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+ (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+ (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+ (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+ (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+ (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+ (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+ (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
IIC_VUNAQ, "vcvt", "f16.f32",
@@ -5725,9 +5855,9 @@ multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
}
}
- def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
(!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
- def : InstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
+ def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
(!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
}
@@ -5738,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
+// Cryptography instructions
+let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
+ DecoderNamespace = "v8Crypto" in {
+ class AES<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
+ : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
+ !strconcat("aes", op), "8", v16i8, v16i8, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
+ SDPatternOperator Int>
+ : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>,
+ Requires<[HasV8, HasCrypto]>;
+ class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
+ : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
+ !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>,
+ Requires<[HasV8, HasCrypto]>;
+}
+
+def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
+def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
+def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
+def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
+
+def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>;
+def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
+def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
+def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>;
+def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>;
+def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>;
+def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
+def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
+def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
+def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e7218c6..af5ef53 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -269,25 +269,26 @@ class T1SystemEncoding<bits<8> opc>
let Inst{7-0} = opc;
}
-def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "", []>,
- T1SystemEncoding<0x00>, // A8.6.110
- Requires<[IsThumb2]>;
-
-def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "", []>,
- T1SystemEncoding<0x10>, // A8.6.410
- Requires<[IsThumb2]>;
-
-def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "", []>,
- T1SystemEncoding<0x20>, // A8.6.408
- Requires<[IsThumb2]>;
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>,
+ T1SystemEncoding<0x00>,
+ Requires<[IsThumb, HasV6M]> {
+ bits<4> imm;
+ let Inst{7-4} = imm;
+}
-def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "", []>,
- T1SystemEncoding<0x30>, // A8.6.409
- Requires<[IsThumb2]>;
+class tHintAlias<string Asm, dag Result> : tInstAlias<Asm, Result> {
+ let Predicates = [IsThumb, HasV6M];
+}
-def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "", []>,
- T1SystemEncoding<0x40>, // A8.6.157
- Requires<[IsThumb2]>;
+def : tHintAlias<"nop$p", (tHINT 0, pred:$p)>; // A8.6.110
+def : tHintAlias<"yield$p", (tHINT 1, pred:$p)>; // A8.6.410
+def : tHintAlias<"wfe$p", (tHINT 2, pred:$p)>; // A8.6.408
+def : tHintAlias<"wfi$p", (tHINT 3, pred:$p)>; // A8.6.409
+def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
+def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
+def : T2Pat<(int_arm_sevl), (tHINT 5)>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -300,8 +301,15 @@ def tBKPT : T1I<(outs), (ins imm0_255:$val), NoItinerary, "bkpt\t$val",
let Inst{7-0} = val;
}
+def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val",
+ []>, T1Encoding<0b101110>, Requires<[IsThumb, HasV8]> {
+ let Inst{9-6} = 0b1010;
+ bits<6> val;
+ let Inst{5-0} = val;
+}
+
def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end",
- []>, T1Encoding<0b101101> {
+ []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> {
bits<1> end;
// A8.6.156
let Inst{9-5} = 0b10010;
@@ -491,7 +499,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
T1Encoding<{1,1,1,0,0,?}>, Sched<[WriteBr]> {
bits<11> target;
let Inst{10-0} = target;
- }
+ let AsmMatchConverter = "cvtThumbBranches";
+ }
// Far jump
// Just a pseudo for a tBL instruction. Needed to let regalloc know about
@@ -521,6 +530,7 @@ let isBranch = 1, isTerminator = 1 in
bits<8> target;
let Inst{11-8} = p;
let Inst{7-0} = target;
+ let AsmMatchConverter = "cvtThumbBranches";
}
@@ -660,9 +670,6 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
let Inst{7-0} = addr;
}
-def : tInstAlias<"ldr${p}.n $Rt, $addr",
- (tLDRpci tGPR:$Rt, t_addrmode_pc:$addr, pred:$p), 0>;
-
// A8.6.194 & A8.6.192
defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
t_addrmode_is4, AddrModeT1_4,
@@ -1205,9 +1212,9 @@ def tUXTH : // A8.6.264
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
- PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary,
- [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+ PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p),
+ NoItinerary,
+ [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>;
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 84086a5..48acffd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -465,6 +465,18 @@ class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
let Inst{3-0} = Rm;
}
+class T2ThreeRegNoP<dag oops, dag iops, InstrItinClass itin,
+ string asm, list<dag> pattern>
+ : T2XI<oops, iops, itin, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+
+ let Inst{11-8} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{3-0} = Rm;
+}
+
class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2sI<oops, iops, itin, opc, asm, pattern> {
@@ -1396,6 +1408,32 @@ def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary,
+ opc, asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-24} = 0b000;
+ let Inst{23-20} = bits23_20;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR,
BinOpFrag<(store node:$LHS, node:$RHS)>>;
@@ -1539,6 +1577,31 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+class T2Istrrel<bits<2> bit54, dag oops, dag iops,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
+ asm, "", pattern>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{11-6} = 0b111110;
+ let Inst{5-4} = bit54;
+ let Inst{3-0} = 0b1111;
+
+ // Encode instruction operands
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+}
+
+def t2STL : T2Istrrel<0b10, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stl", "\t$Rt, $addr", []>;
+def t2STLB : T2Istrrel<0b00, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlb", "\t$Rt, $addr", []>;
+def t2STLH : T2Istrrel<0b01, (outs), (ins rGPR:$Rt, addr_offset_none:$addr),
+ "stlh", "\t$Rt, $addr", []>;
+
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access.
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
@@ -1855,6 +1918,9 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
+def : t2InstAlias<"mov${p} $Rd, $imm",
+ (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
+
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
(ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
@@ -2950,6 +3016,34 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
Requires<[HasT2ExtractPack, IsThumb2]>;
//===----------------------------------------------------------------------===//
+// CRC32 Instructions
+//
+// Polynomials:
+// + CRC32{B,H,W} 0x04C11DB7
+// + CRC32C{B,H,W} 0x1EDC6F41
+//
+
+class T2I_crc32<bit C, bits<2> sz, string suffix, SDPatternOperator builtin>
+ : T2ThreeRegNoP<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), NoItinerary,
+ !strconcat("crc32", suffix, "\t$Rd, $Rn, $Rm"),
+ [(set rGPR:$Rd, (builtin rGPR:$Rn, rGPR:$Rm))]>,
+ Requires<[IsThumb2, HasV8, HasCRC]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b010110;
+ let Inst{20} = C;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-6} = 0b10;
+ let Inst{5-4} = sz;
+}
+
+def t2CRC32B : T2I_crc32<0, 0b00, "b", int_arm_crc32b>;
+def t2CRC32CB : T2I_crc32<1, 0b00, "cb", int_arm_crc32cb>;
+def t2CRC32H : T2I_crc32<0, 0b01, "h", int_arm_crc32h>;
+def t2CRC32CH : T2I_crc32<1, 0b01, "ch", int_arm_crc32ch>;
+def t2CRC32W : T2I_crc32<0, 0b10, "w", int_arm_crc32w>;
+def t2CRC32CW : T2I_crc32<1, 0b10, "cw", int_arm_crc32cw>;
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
@@ -3029,93 +3123,67 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
// Conditional moves
-// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
let isCommutable = 1, isSelect = 1 in
def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, pred:$p),
+ (ins rGPR:$false, rGPR:$Rm, cmovpred:$p),
4, IIC_iCMOVr,
- [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">,
- Sched<[WriteALU]>;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
- (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+def t2MOVCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
4, IIC_iCMOVi,
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
-// FIXME: Pseudo-ize these. For now, just mark codegen only.
let isCodeGenOnly = 1 in {
let isMoveImm = 1 in
-def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, imm0_65535_expr:$imm),
- IIC_iCMOVi,
- "movw", "\t$Rd, $imm", []>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 1;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{15} = 0;
-
- bits<4> Rd;
- bits<16> imm;
-
- let Inst{11-8} = Rd;
- let Inst{19-16} = imm{15-12};
- let Inst{26} = imm{11};
- let Inst{14-12} = imm{10-8};
- let Inst{7-0} = imm{7-0};
-}
+def t2MOVCCi16
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
let isMoveImm = 1 in
-def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
- (ins rGPR:$false, i32imm:$src, pred:$p),
- IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+def t2MVNCCi
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVi,
+ [(set rGPR:$Rd,
+ (ARMcmov rGPR:$false, t2_so_imm_not:$imm,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+class MOVCCShPseudo<SDPatternOperator opnode, Operand ty>
+ : t2PseudoInst<(outs rGPR:$Rd),
+ (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p),
+ 4, IIC_iCMOVsi,
+ [(set rGPR:$Rd, (ARMcmov rGPR:$false,
+ (opnode rGPR:$Rm, (i32 ty:$imm)),
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $Rd">, Sched<[WriteALU]>;
+
+def t2MOVCClsl : MOVCCShPseudo<shl, imm0_31>;
+def t2MOVCClsr : MOVCCShPseudo<srl, imm_sr>;
+def t2MOVCCasr : MOVCCShPseudo<sra, imm_sr>;
+def t2MOVCCror : MOVCCShPseudo<rotr, imm0_31>;
let isMoveImm = 1 in
-def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
- IIC_iCMOVi, "mvn", "\t$Rd, $imm",
-[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
- imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $Rd">, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11110;
- let Inst{25} = 0;
- let Inst{24-21} = 0b0011;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15} = 0;
-}
-
-class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern>, Sched<[WriteALU]> {
- let Inst{31-27} = 0b11101;
- let Inst{26-25} = 0b01;
- let Inst{24-21} = 0b0010;
- let Inst{20} = 0; // The S bit.
- let Inst{19-16} = 0b1111; // Rn
- let Inst{5-4} = opcod; // Shift type.
-}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
- (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
- IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
- RegConstraint<"$false = $Rd">;
+def t2MOVCCi32imm
+ : t2PseudoInst<(outs rGPR:$dst),
+ (ins rGPR:$false, i32imm:$src, cmovpred:$p),
+ 8, IIC_iCMOVix2,
+ [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src,
+ cmovpred:$p))]>,
+ RegConstraint<"$false = $dst">;
} // isCodeGenOnly = 1
} // neverHasSideEffects
@@ -3127,7 +3195,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
- "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+ "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f5;
@@ -3136,7 +3204,8 @@ def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
}
def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
- "dsb", "\t$opt", []>, Requires<[HasDB]> {
+ "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
+ Requires<[HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
@@ -3149,15 +3218,14 @@ def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
let Inst{3-0} = opt;
}
-class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
let Inst{3-0} = 0b1111;
bits<4> addr;
@@ -3165,15 +3233,14 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
let Inst{19-16} = addr;
let Inst{15-12} = Rt;
}
-class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
+class T2I_strex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
: Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001100;
let Inst{11-8} = rt2;
- let Inst{7-6} = 0b01;
- let Inst{5-4} = opcod;
+ let Inst{7-4} = opcod;
bits<4> Rd;
bits<4> addr;
@@ -3184,11 +3251,11 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+def t2LDREXB : T2I_ldrex<0b0100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexb", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexh", "\t$Rt, $addr", "",
[(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>;
@@ -3206,7 +3273,7 @@ def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr),
let Inst{7-0} = addr{7-0};
}
let hasExtraDefRegAllocReq = 1 in
-def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
+def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2),
(ins addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"ldrexd", "\t$Rt, $Rt2, $addr", "",
@@ -3214,16 +3281,48 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2LDAEXB : T2I_ldrex<0b1100, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexb", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexh", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaex", "\t$Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-0} = 0b11101111;
+}
+let hasExtraDefRegAllocReq = 1 in
+def t2LDAEXD : T2I_ldrex<0b1111, (outs rGPR:$Rt, rGPR:$Rt2),
+ (ins addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "ldaexd", "\t$Rt, $Rt2, $addr", "",
+ [], {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+
+ let Inst{7} = 1;
+}
}
let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+def t2STREXB : T2I_strex<0b0100, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexb", "\t$Rd, $Rt, $addr", "",
[(set rGPR:$Rd, (strex_1 rGPR:$Rt,
addr_offset_none:$addr))]>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd),
(ins rGPR:$Rt, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexh", "\t$Rd, $Rt, $addr", "",
@@ -3247,7 +3346,7 @@ def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
let Inst{7-0} = addr{7-0};
}
let hasExtraSrcRegAllocReq = 1 in
-def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
AddrModeNone, 4, NoItinerary,
"strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
@@ -3255,6 +3354,42 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
bits<4> Rt2;
let Inst{11-8} = Rt2;
}
+def t2STLEXB : T2I_strex<0b1100, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexb", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexh", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]>;
+
+def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
+ addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlex", "\t$Rd, $Rt, $addr", "",
+ []>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rd;
+ bits<4> Rt;
+ bits<4> addr;
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001100;
+ let Inst{19-16} = addr;
+ let Inst{15-12} = Rt;
+ let Inst{11-4} = 0b11111110;
+ let Inst{3-0} = Rd;
+}
+let hasExtraSrcRegAllocReq = 1 in
+def t2STLEXD : T2I_strex<0b1111, (outs rGPR:$Rd),
+ (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
+ AddrModeNone, 4, NoItinerary,
+ "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
+ {?, ?, ?, ?}>, Requires<[IsThumb, HasV8]> {
+ bits<4> Rt2;
+ let Inst{11-8} = Rt2;
+}
}
def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", [(int_arm_clrex)]>,
@@ -3336,13 +3471,14 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{12} = 1;
bits<24> target;
- let Inst{26} = target{19};
- let Inst{11} = target{18};
- let Inst{13} = target{17};
+ let Inst{26} = target{23};
+ let Inst{13} = target{22};
+ let Inst{11} = target{21};
let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
-}
+ let AsmMatchConverter = "cvtThumbBranches";
+}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
@@ -3410,6 +3546,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
let Inst{10-0} = target{11-1};
let DecoderMethod = "DecodeThumb2BCCInstruction";
+ let AsmMatchConverter = "cvtThumbBranches";
}
// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
@@ -3428,7 +3565,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, 2, IIC_iALUx,
- "it$mask\t$cc", "", []> {
+ "it$mask\t$cc", "", []>,
+ ComplexDeprecationPredicate<"IT"> {
// 16-bit instruction.
let Inst{31-16} = 0x0000;
let Inst{15-8} = 0b10111111;
@@ -3502,27 +3640,34 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
let M = 1 in
def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
- "$imod.w\t$iflags, $mode">;
+ "$imod\t$iflags, $mode">;
let mode = 0, M = 0 in
def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
"$imod.w\t$iflags">;
let imod = 0, iflags = 0, M = 1 in
def t2CPS1p : t2CPS<(ins imm0_31:$mode), "\t$mode">;
+def : t2InstAlias<"cps$imod.w $iflags, $mode",
+ (t2CPS3p imod_op:$imod, iflags_op:$iflags, i32imm:$mode), 0>;
+def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
+
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
- bits<3> imm;
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> {
+ bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
- let Inst{2-0} = imm;
+ let Inst{7-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
def : t2InstAlias<"wfi$p.w", (t2HINT 3, pred:$p)>;
def : t2InstAlias<"sev$p.w", (t2HINT 4, pred:$p)>;
+def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> {
+ let Predicates = [IsThumb2, HasV8];
+}
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
bits<4> opt;
@@ -3545,6 +3690,20 @@ def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{19-16} = opt;
}
+class T2DCPS<bits<2> opt, string opc>
+ : T2I<(outs), (ins), NoItinerary, opc, "", []>, Requires<[IsThumb2, HasV8]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111000;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = 0b1000;
+ let Inst{11-2} = 0b0000000000;
+ let Inst{1-0} = opt;
+}
+
+def t2DCPS1 : T2DCPS<0b01, "dcps1">;
+def t2DCPS2 : T2DCPS<0b10, "dcps2">;
+def t2DCPS3 : T2DCPS<0b11, "dcps3">;
+
class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -3600,9 +3759,12 @@ def t2RFEIA : T2RFE<0b111010011001,
[/* For disassembly only; pattern left blank */]>;
// B9.3.19 SUBS PC, LR, #imm (Thumb2) system instruction.
-let Defs = [PC], Uses = [LR] in
+// Exception return instruction is "subs pc, lr, #imm".
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
- "subs", "\tpc, lr, $imm", []>, Requires<[IsThumb2]> {
+ "subs", "\tpc, lr, $imm",
+ [(ARMintretflag imm0_255:$imm)]>,
+ Requires<[IsThumb2]> {
let Inst{31-8} = 0b111100111101111010001111;
bits<8> imm;
@@ -3752,10 +3914,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">;
-defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">;
-defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">;
-defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">;
-defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
+defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>;
+defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>;
+defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>;
+defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>;
//===----------------------------------------------------------------------===//
@@ -3767,7 +3929,7 @@ defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">;
//
// A/R class can only move from CPSR or SPSR.
def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111011111000;
let Inst{11-8} = Rd;
@@ -3777,7 +3939,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr",
def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>;
def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
- []>, Requires<[IsThumb2,IsARClass]> {
+ []>, Requires<[IsThumb2,IsNotMClass]> {
bits<4> Rd;
let Inst{31-12} = 0b11110011111111111000;
let Inst{11-8} = Rd;
@@ -3810,7 +3972,7 @@ def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary,
// the mask with the fields to be accessed in the special register.
def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn),
NoItinerary, "msr", "\t$mask, $Rn", []>,
- Requires<[IsThumb2,IsARClass]> {
+ Requires<[IsThumb2,IsNotMClass]> {
bits<5> mask;
bits<4> Rn;
let Inst{31-21} = 0b11110011100;
@@ -3892,7 +4054,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]>,
+ ComplexDeprecationPredicate<"MCR">;
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3900,7 +4063,9 @@ def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
- imm:$CRm, imm:$opc2)]>;
+ imm:$CRm, imm:$opc2)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3915,7 +4080,9 @@ def : t2InstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
(outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
- c_imm:$CRm, imm0_7:$opc2), []>;
+ c_imm:$CRm, imm0_7:$opc2), []> {
+ let Predicates = [IsThumb2, PreV8];
+}
def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
@@ -3933,17 +4100,22 @@ def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
imm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
- GPR:$Rt2, imm:$CRm)]>;
+ GPR:$Rt2, imm:$CRm)]> {
+ let Predicates = [IsThumb2, PreV8];
+}
+
/* from coprocessor to ARM core register */
def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
-def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> {
+ let Predicates = [IsThumb2, PreV8];
+}
//===----------------------------------------------------------------------===//
// Other Coprocessor Instructions.
//
-def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
@@ -3964,6 +4136,8 @@ def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
@@ -3987,6 +4161,8 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
let Inst{15-12} = CRd;
let Inst{19-16} = CRn;
let Inst{23-20} = opc1;
+
+ let Predicates = [IsThumb2, PreV8];
}
@@ -4060,6 +4236,15 @@ def : T2Pat<(atomic_store_32 t2addrmode_negimm8:$addr, GPR:$val),
def : T2Pat<(atomic_store_32 t2addrmode_so_reg:$addr, GPR:$val),
(t2STRs GPR:$val, t2addrmode_so_reg:$addr)>;
+let AddedComplexity = 8 in {
+ def : T2Pat<(atomic_load_acquire_8 addr_offset_none:$addr), (t2LDAB addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_16 addr_offset_none:$addr), (t2LDAH addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_load_acquire_32 addr_offset_none:$addr), (t2LDA addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (t2STLB GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (t2STLH GPR:$val, addr_offset_none:$addr)>;
+ def : T2Pat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (t2STL GPR:$val, addr_offset_none:$addr)>;
+}
+
//===----------------------------------------------------------------------===//
// Assembler aliases
@@ -4081,7 +4266,8 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
@@ -4156,9 +4342,9 @@ def : t2InstAlias<"tst${p} $Rn, $Rm",
(t2TSTrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>;
// Memory barriers
-def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
-def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[IsThumb2, HasDB]>;
+def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p)>, Requires<[HasDB]>;
+def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p)>, Requires<[HasDB]>;
// Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional
// width specifier.
@@ -4185,7 +4371,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr",
(t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
def : t2InstAlias<"ldr${p} $Rt, $addr",
- (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+ (t2LDRpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrb${p} $Rt, $addr",
(t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p} $Rt, $addr",
@@ -4347,16 +4533,16 @@ def : t2InstAlias<"mvn${p} $Rd, $imm",
(t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
// Same for AND <--> BIC
def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
- (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : t2InstAlias<"and${s}${p} $Rdn, $imm",
- (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+ (t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
@@ -4398,7 +4584,7 @@ def : t2InstAlias<"adr${p} $Rd, $addr",
// LDR(literal) w/ alternate [pc, #imm] syntax.
def t2LDRpcrel : t2AsmPseudo<"ldr${p} $Rt, $addr",
- (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+ (ins GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRBpcrel : t2AsmPseudo<"ldrb${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def t2LDRHpcrel : t2AsmPseudo<"ldrh${p} $Rt, $addr",
@@ -4409,7 +4595,7 @@ def t2LDRSHpcrel : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
(ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
// Version w/ the .w suffix.
def : t2InstAlias<"ldr${p}.w $Rt, $addr",
- (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
+ (t2LDRpcrel GPR:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p), 0>;
def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
(t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index f9cfa15..a8cdc5c 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -333,45 +333,52 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
let D = VFPNeonA8Domain;
}
-multiclass vsel_inst<string op, bits<2> opc> {
- let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+ let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
+ Uses = [CPSR], AddedComplexity = 4 in {
def S : ASbInp<0b11100, opc, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
- []>, Requires<[HasV8FP]>;
+ [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>,
+ Requires<[HasFPARMv8]>;
def D : ADbInp<0b11100, opc, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
- []>, Requires<[HasV8FP]>;
+ [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
}
}
-defm VSELGT : vsel_inst<"gt", 0b11>;
-defm VSELGE : vsel_inst<"ge", 0b10>;
-defm VSELEQ : vsel_inst<"eq", 0b00>;
-defm VSELVS : vsel_inst<"vs", 0b01>;
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
-multiclass vmaxmin_inst<string op, bit opc> {
+multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
def S : ASbInp<0b11101, 0b00, opc,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"),
- []>, Requires<[HasV8FP]>;
+ [(set SPR:$Sd, (SD SPR:$Sn, SPR:$Sm))]>,
+ Requires<[HasFPARMv8]>;
def D : ADbInp<0b11101, 0b00, opc,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
NoItinerary, !strconcat(op, ".f64\t$Dd, $Dn, $Dm"),
- []>, Requires<[HasV8FP]>;
+ [(set DPR:$Dd, (f64 (SD (f64 DPR:$Dn), (f64 DPR:$Dm))))]>,
+ Requires<[HasFPARMv8, HasDPVFP]>;
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
- (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULD DPR:$a, DPR:$b)>,
+ Requires<[NoHonorSignDependentRounding,HasDPVFP]>;
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
@@ -502,6 +509,8 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
let Inst{11-8} = 0b1011;
let Inst{7-6} = 0b11;
let Inst{4} = 0;
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
// Between half, single and double-precision. For disassembly only.
@@ -532,7 +541,7 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -544,7 +553,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -559,7 +568,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sm;
@@ -571,7 +580,7 @@ def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
def VCVTTDH : ADuI<0b11101, 0b11, 0b0011, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, "vcvtt", ".f16.f64\t$Sd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -588,21 +597,21 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -616,7 +625,7 @@ multiclass vcvt_inst<string opc, bits<2> rm> {
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
let Inst{17-16} = rm;
@@ -652,17 +661,24 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> {
def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{7} = op2;
let Inst{16} = op;
}
def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm",
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{7} = op2;
let Inst{16} = op;
}
+
+ def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"),
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>,
+ Requires<[HasFPARMv8]>;
+ def : InstAlias<!strconcat("vrint", opc, "$p.f64.f64\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm, pred:$p)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
}
defm VRINTZ : vrint_inst_zrx<"z", 0, 1>;
@@ -674,21 +690,23 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> {
def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"),
- []>, Requires<[HasV8FP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]> {
let Inst{17-16} = rm;
}
}
def : InstAlias<!strconcat("vrint", opc, ".f32.f32\t$Sd, $Sm"),
- (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>;
+ (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm)>,
+ Requires<[HasFPARMv8]>;
def : InstAlias<!strconcat("vrint", opc, ".f64.f64\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>,
+ Requires<[HasFPARMv8,HasDPVFP]>;
}
defm VRINTA : vrint_inst_anpm<"a", 0b00>;
@@ -885,6 +903,8 @@ class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Sm{0};
let Inst{15-12} = Dd{3-0};
let Inst{22} = Dd{4};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -956,6 +976,8 @@ class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
let Inst{5} = Dm{4};
let Inst{15-12} = Sd{4-1};
let Inst{22} = Sd{0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1077,6 +1099,8 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
let Inst{15-12} = dst{3-0};
+
+ let Predicates = [HasVFP2, HasDPVFP];
}
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
@@ -1189,7 +1213,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1205,7 +1229,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
@@ -1216,7 +1240,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1232,7 +1256,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1243,7 +1267,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1259,7 +1283,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1270,7 +1294,7 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1285,7 +1309,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1299,7 +1323,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1314,7 +1338,7 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1323,7 +1347,7 @@ def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma x, y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, DPR:$Ddin)),
(VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, SPR:$Sdin)),
(VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1334,7 +1358,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1349,7 +1373,7 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
(VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1358,14 +1382,14 @@ def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
// (fma (fneg x), y, z) -> (vfms z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1376,7 +1400,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1391,7 +1415,7 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
(VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1400,14 +1424,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
// (fneg (fma x, y, z)) -> (vfnma z, x, y)
def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1418,7 +1442,7 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1432,7 +1456,7 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
(VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
- Requires<[HasVFP4,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
(VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
@@ -1442,21 +1466,21 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
// (fma x, y, (fneg z)) -> (vfnms z, x, y))
def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
- Requires<[HasVFP4]>;
+ Requires<[HasVFP4,HasDPVFP]>;
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
@@ -1466,15 +1490,17 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
//
let neverHasSideEffects = 1 in {
-def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
- 4, IIC_fpUNA64,
- [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
- RegConstraint<"$Dn = $Dd">;
-
-def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
- 4, IIC_fpUNA32,
- [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
- RegConstraint<"$Sn = $Sd">;
+def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p),
+ IIC_fpUNA64,
+ [(set (f64 DPR:$Dd),
+ (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
+ RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+
+def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
+ IIC_fpUNA32,
+ [(set (f32 SPR:$Sd),
+ (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
+ RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -1520,6 +1546,8 @@ let Uses = [FPSCR] in {
"vmrs", "\t$Rt, mvfr0", []>;
def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, mvfr1", []>;
+ def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins),
+ "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>;
def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins),
"vmrs", "\t$Rt, fpinst", []>;
def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins),
@@ -1573,7 +1601,8 @@ let isReMaterializable = 1 in {
def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
"vmov", ".f64\t$Dd, $imm",
- [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>,
+ Requires<[HasVFP3,HasDPVFP]> {
bits<5> Dd;
bits<8> imm;
@@ -1655,23 +1684,23 @@ def : VFP2MnemonicAlias<"fmrx", "vmrs">;
def : VFP2MnemonicAlias<"fmxr", "vmsr">;
// Be friendly and accept the old form of zero-compare
-def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2DPInstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
(VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
- (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"faddd${p} $Dd, $Dn, $Dm",
+ (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
(VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
- (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+ (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
// No need for the size suffix on VSQRT. It's implied by the register classes.
def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
-def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : VFP2DPInstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
// VLDR/VSTR accept an optional type suffix.
def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 1803a8a..61596d5 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -90,6 +90,10 @@ namespace {
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
+ void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
+ const MemOpQueue &MemOps, unsigned DefReg,
+ unsigned RangeBegin, unsigned RangeEnd);
+
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -360,6 +364,62 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return true;
}
+/// \brief Find all instructions using a given imp-def within a range.
+///
+/// We are trying to combine a range of instructions, one of which (located at
+/// position RangeBegin) implicitly defines a register. The final LDM/STM will
+/// be placed at RangeEnd, and so any uses of this definition between RangeStart
+/// and RangeEnd must be modified to use an undefined value.
+///
+/// The live range continues until we find a second definition or one of the
+/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
+/// we must consider all uses and decide which are relevant in a second pass.
+void ARMLoadStoreOpt::findUsesOfImpDef(
+ SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
+ unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
+ std::map<unsigned, MachineOperand *> Uses;
+ unsigned LastLivePos = RangeEnd;
+
+ // First we find all uses of this register with Position between RangeBegin
+ // and RangeEnd, any or all of these could be uses of a definition at
+ // RangeBegin. We also record the latest position a definition at RangeBegin
+ // would be considered live.
+ for (unsigned i = 0; i < MemOps.size(); ++i) {
+ MachineInstr &MI = *MemOps[i].MBBI;
+ unsigned MIPosition = MemOps[i].Position;
+ if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
+ continue;
+
+ // If this instruction defines the register, then any later use will be of
+ // that definition rather than ours.
+ if (MI.definesRegister(DefReg))
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
+ if (!UseOp)
+ continue;
+
+ // If this instruction kills the register then (assuming liveness is
+ // correct when we start) we don't need to think about anything after here.
+ if (UseOp->isKill())
+ LastLivePos = std::min(LastLivePos, MIPosition);
+
+ Uses[MIPosition] = UseOp;
+ }
+
+ // Now we traverse the list of all uses, and append the ones that actually use
+ // our definition to the requested list.
+ for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
+ E = Uses.end();
+ I != E; ++I) {
+ // List is sorted by position so once we've found one out of range there
+ // will be no more to consider.
+ if (I->first > LastLivePos)
+ break;
+ UsesOfImpDefs.push_back(I->second);
+ }
+}
+
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
@@ -392,6 +452,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 8> ImpDefs;
+ SmallVector<MachineOperand *, 8> UsesOfImpDefs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
unsigned Reg = memOps[i].Reg;
// If we are inserting the merged operation after an operation that
@@ -406,6 +467,12 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
unsigned DefReg = MO->getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
ImpDefs.push_back(DefReg);
+
+ // There may be other uses of the definition between this instruction and
+ // the eventual LDM/STM position. These should be marked undef if the
+ // merge takes place.
+ findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
+ insertPos);
}
}
@@ -418,6 +485,16 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// Merge succeeded, update records.
Merges.push_back(prior(Loc));
+
+ // In gathering loads together, we may have moved the imp-def of a register
+ // past one of its uses. This is OK, since we know better than the rest of
+ // LLVM what's OK with ARM loads and stores; but we still have to adjust the
+ // affected uses.
+ for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
+ E = UsesOfImpDefs.end();
+ I != E; ++I)
+ (*I)->setIsUndef();
+
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any memops that come before insertPos.
if (Regs[i-memOpsBegin].second) {
@@ -489,7 +566,10 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
((isNotVFP && RegNum > PRegNum) ||
- ((Count < Limit) && RegNum == PRegNum+1))) {
+ ((Count < Limit) && RegNum == PRegNum+1)) &&
+ // On Swift we don't want vldm/vstm to start with a odd register num
+ // because Q register unaligned vldm/vstm need more uops.
+ (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
Offset += Size;
PRegNum = RegNum;
++Count;
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index b641483..e12c9c6 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -82,7 +82,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d9ec4fd..010edf3 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -84,12 +84,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned GPRCS2Size;
unsigned DPRCSSize;
- /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
- /// which belong to these spill areas.
- BitVector GPRCS1Frames;
- BitVector GPRCS2Frames;
- BitVector DPRCSFrames;
-
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
/// sequence of D-registers starting from d8.
@@ -128,7 +122,6 @@ public:
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
NumAlignedDPRCS2Regs(0),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -141,7 +134,6 @@ public:
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
@@ -190,59 +182,6 @@ public:
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
- bool isGPRCalleeSavedArea1Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS1Frames.size())
- return false;
- return GPRCS1Frames[fi];
- }
- bool isGPRCalleeSavedArea2Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS2Frames.size())
- return false;
- return GPRCS2Frames[fi];
- }
- bool isDPRCalleeSavedAreaFrame(int fi) const {
- if (fi < 0 || fi >= (int)DPRCSFrames.size())
- return false;
- return DPRCSFrames[fi];
- }
-
- void addGPRCalleeSavedArea1Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS1Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS1Frames.resize(Size);
- }
- GPRCS1Frames[fi] = true;
- }
- }
- void addGPRCalleeSavedArea2Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS2Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS2Frames.resize(Size);
- }
- GPRCS2Frames[fi] = true;
- }
- }
- void addDPRCalleeSavedAreaFrame(int fi) {
- if (fi >= 0) {
- int Size = DPRCSFrames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- DPRCSFrames.resize(Size);
- }
- DPRCSFrames[fi] = true;
- }
- }
-
unsigned createJumpTableUId() {
return JumpTableUId++;
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index bb7d358..d045761 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -172,6 +172,7 @@ def ITSTATE : ARMReg<4, "itstate">;
// Special Registers - only available in privileged mode.
def FPSID : ARMReg<0, "fpsid">;
+def MVFR2 : ARMReg<5, "mvfr2">;
def MVFR1 : ARMReg<6, "mvfr1">;
def MVFR0 : ARMReg<7, "mvfr0">;
def FPEXC : ARMReg<8, "fpexc">;
@@ -251,7 +252,7 @@ def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
// this class and the preceding one(!) This is what we want.
-def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R12)> {
let AltOrders = [(and tcGPR, tGPR)];
let AltOrderSelect = [{
return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 74ee50b..603e775 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1879,6 +1879,10 @@ def CortexA9Itineraries : ProcessorItineraries<
// The following definitions describe the simpler per-operand machine model.
// This works with MachineScheduler and will eventually replace itineraries.
+class A9WriteLMOpsListType<list<WriteSequence> writes> {
+ list <WriteSequence> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
// Cortex-A9 machine model for scheduling and other instruction cost heuristics.
def CortexA9Model : SchedMachineModel {
@@ -2011,7 +2015,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
// Define a predicate to select the LDM based on number of memory addresses.
def A9LMAdr#NumAddr#Pred :
- SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>;
+ SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
} // foreach NumAddr
@@ -2054,48 +2058,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence<
//===----------------------------------------------------------------------===//
// LDM: Load multiple into 32-bit integer registers.
+def A9WriteLMOpsList : A9WriteLMOpsListType<
+ [A9WriteL1, A9WriteL1Hi,
+ A9WriteL2, A9WriteL2Hi,
+ A9WriteL3, A9WriteL3Hi,
+ A9WriteL4, A9WriteL4Hi,
+ A9WriteL5, A9WriteL5Hi,
+ A9WriteL6, A9WriteL6Hi,
+ A9WriteL7, A9WriteL7Hi,
+ A9WriteL8, A9WriteL8Hi]>;
+
// A9WriteLM variants expand into a pair of writes for each 64-bit
// value loaded. When the number of registers is odd, the last
// A9WriteLnHi is naturally ignored because the instruction has no
// following def operands. These variants take no issue resource, so
// they may need to be part of a WriteSequence that includes A9WriteIssue.
def A9WriteLM : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteL1, A9WriteL1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteL1, A9WriteL1Hi,
- A9WriteL2, A9WriteL2Hi,
- A9WriteL3, A9WriteL3Hi,
- A9WriteL4, A9WriteL4Hi,
- A9WriteL5, A9WriteL5Hi,
- A9WriteL6, A9WriteL6Hi,
- A9WriteL7, A9WriteL7Hi,
- A9WriteL8, A9WriteL8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
// For unknown LDMs, define the maximum number of writes, but only
// make the first two consume resources.
SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
@@ -2177,49 +2163,39 @@ def A9WriteLMfp#NumAddr#Hi : WriteSequence<
// pair of writes for each 64-bit data loaded. When the number of
// registers is odd, the last WriteLMfpnHi is naturally ignored because
// the instruction has no following def operands.
+
+def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
+ [A9WriteLMfp1, A9WriteLMfp2, // 0-1
+ A9WriteLMfp3, A9WriteLMfp4, // 2-3
+ A9WriteLMfp5, A9WriteLMfp6, // 4-5
+ A9WriteLMfp7, A9WriteLMfp8, // 6-7
+ A9WriteLMfp1Hi, // 8-8
+ A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
+ A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
+ A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
+ A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
+ A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
+ A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
+ A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
+
def A9WriteLMfpPostRA : SchedWriteVariant<[
- SchedVar<A9LMAdr1Pred, [A9WriteLMfp1, A9WriteLMfp1Hi]>,
- SchedVar<A9LMAdr2Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi]>,
- SchedVar<A9LMAdr3Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi]>,
- SchedVar<A9LMAdr4Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi]>,
- SchedVar<A9LMAdr5Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi]>,
- SchedVar<A9LMAdr6Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi]>,
- SchedVar<A9LMAdr7Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi]>,
- SchedVar<A9LMAdr8Pred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3, A9WriteLMfp3Hi,
- A9WriteLMfp4, A9WriteLMfp4Hi,
- A9WriteLMfp5, A9WriteLMfp5Hi,
- A9WriteLMfp6, A9WriteLMfp6Hi,
- A9WriteLMfp7, A9WriteLMfp7Hi,
- A9WriteLMfp8, A9WriteLMfp8Hi]>,
+ SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
+ SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
+ SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
+ SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
+ SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
+ SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
+ SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
+ SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
// For unknown LDMs, define the maximum number of writes, but only
- // make the first two consume resources.
- SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp1Hi,
- A9WriteLMfp2, A9WriteLMfp2Hi,
- A9WriteLMfp3Hi, A9WriteLMfp3Hi,
- A9WriteLMfp4Hi, A9WriteLMfp4Hi,
+ // make the first two consume resources. We are optimizing for the case
+ // where the operands are DPRs, and this determines the first eight
+ // types. The remaining eight types are filled to cover the case
+ // where the operands are SPRs.
+ SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
+ A9WriteLMfp3Hi, A9WriteLMfp4Hi,
+ A9WriteLMfp5Hi, A9WriteLMfp6Hi,
+ A9WriteLMfp7Hi, A9WriteLMfp8Hi,
A9WriteLMfp5Hi, A9WriteLMfp5Hi,
A9WriteLMfp6Hi, A9WriteLMfp6Hi,
A9WriteLMfp7Hi, A9WriteLMfp7Hi,
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index 2a41616..8d7dbc2 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1345,20 +1345,25 @@ let SchedModel = SwiftModel in {
// 4.2.20 Integer Load Signextended
def SwiftWriteP2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 3;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 4;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P01P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01,
SwiftUnitP01]> {
let Latency = 4;
+ let NumMicroOps = 3;
}
def SwiftWriteP2P2ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2]> {
let Latency = 3;
+ let NumMicroOps = 2;
}
def SwiftWriteP2P2P01ThreeCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP2,
- SwiftUnitP01]> {
+ SwiftUnitP01]> {
let Latency = 3;
+ let NumMicroOps = 3;
}
def SwiftWrBackOne : SchedWriteRes<[]> {
let Latency = 1;
@@ -1399,7 +1404,10 @@ let SchedModel = SwiftModel in {
def SwiftWriteLM#Lat#Cy : SchedWriteRes<[SwiftUnitP2]> {
let Latency = Lat;
}
- def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> { let Latency = Lat; }
+ def SwiftWriteLM#Lat#CyNo : SchedWriteRes<[]> {
+ let Latency = Lat;
+ let NumMicroOps = 0;
+ }
}
// Predicate.
foreach NumAddr = 1-16 in {
@@ -1520,6 +1528,7 @@ let SchedModel = SwiftModel in {
// 4.2.25 Integer Store, Multiple
def SwiftWriteStIncAddr : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 0;
+ let NumMicroOps = 2;
}
foreach NumAddr = 1-16 in {
def SwiftWriteSTM#NumAddr : WriteSequence<[SwiftWriteStIncAddr], NumAddr>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0a0f30c..8351c63 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -32,7 +32,7 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
static cl::opt<bool>
-DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
@@ -57,10 +57,28 @@ Align(cl::desc("Load/store alignment support"),
"Allow unaligned memory accesses"),
clEnumValEnd));
+enum ITMode {
+ DefaultIT,
+ RestrictedIT,
+ NoRestrictedIT
+};
+
+static cl::opt<ITMode>
+IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
+ cl::ZeroOrMore,
+ cl::values(clEnumValN(DefaultIT, "arm-default-it",
+ "Generate IT block based on arch"),
+ clEnumValN(RestrictedIT, "arm-restrict-it",
+ "Disallow deprecated IT based on ARMv8"),
+ clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
+ "Allow IT blocks based on ARMv7"),
+ clEnumValEnd));
+
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
+ , ARMProcClass(None)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
@@ -75,13 +93,14 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TOps = false;
HasV5TEOps = false;
HasV6Ops = false;
+ HasV6MOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
- HasV8FP = false;
+ HasFPARMv8 = false;
HasNEON = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
@@ -90,7 +109,6 @@ void ARMSubtarget::initializeEnvironment() {
SlowFPBrcc = false;
InThumbMode = false;
HasThumb2 = false;
- IsMClass = false;
NoARM = false;
PostRAScheduler = false;
IsR9Reserved = ReserveR9;
@@ -107,9 +125,12 @@ void ARMSubtarget::initializeEnvironment() {
AvoidMOVsShifterOperand = false;
HasRAS = false;
HasMPExtension = false;
+ HasVirtualization = false;
FPOnlySP = false;
HasPerfMon = false;
HasTrustZone = false;
+ HasCrypto = false;
+ HasCRC = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
@@ -133,8 +154,13 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
- if (CPUString.empty())
- CPUString = "generic";
+ if (CPUString.empty()) {
+ if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else
+ CPUString = "generic";
+ }
// Insert the architecture feature derived from the target triple into the
// feature string. This is important for setting features that are implied
@@ -152,7 +178,7 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
// ARM version or CPU and then remove this.
if (!HasV6T2Ops && hasThumb2())
- HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
+ HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6MOps = HasV6T2Ops = true;
// Keep a pointer to static instruction cost data for the specified CPU.
SchedModel = getSchedModelForCPU(CPUString);
@@ -169,11 +195,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (!isTargetIOS())
- UseMovt = hasV6T2Ops();
- else {
+ UseMovt = hasV6T2Ops() && ArmUseMOVT;
+
+ if (!isTargetIOS()) {
+ IsR9Reserved = ReserveR9;
+ } else {
IsR9Reserved = ReserveR9 | !HasV6Ops;
- UseMovt = DarwinUseMOVT && hasV6T2Ops();
SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
}
@@ -207,6 +234,18 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
break;
}
+ switch (IT) {
+ case DefaultIT:
+ RestrictIT = hasV8Ops() ? true : false;
+ break;
+ case RestrictedIT:
+ RestrictIT = true;
+ break;
+ case NoRestrictedIT:
+ RestrictIT = false;
+ break;
+ }
+
// NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
uint64_t Bits = getFeatureBits();
if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
@@ -271,12 +310,15 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
return SchedModel->MispredictPenalty;
}
+bool ARMSubtarget::hasSinCos() const {
+ return getTargetTriple().getOS() == Triple::IOS &&
+ !getTargetTriple().isOSVersionLT(7, 0);
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
- CriticalPathRCs.clear();
- CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index ad7f1b3..5276901 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -31,29 +31,36 @@ class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift
+ Others, CortexA5, CortexA8, CortexA9, CortexA15, CortexR5, Swift, CortexA53, CortexA57
+ };
+ enum ARMProcClassEnum {
+ None, AClass, RClass, MClass
};
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
+ /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
+ ARMProcClassEnum ARMProcClass;
+
/// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
+ bool HasV6MOps;
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
- /// HasVFPv2, HasVFPv3, HasVFPv4, HasV8FP, HasNEON - Specify what
+ /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
bool HasVFPv2;
bool HasVFPv3;
bool HasVFPv4;
- bool HasV8FP;
+ bool HasFPARMv8;
bool HasNEON;
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -82,10 +89,6 @@ protected:
/// HasThumb2 - True if Thumb2 instructions are supported.
bool HasThumb2;
- /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
- /// v6m, v7m for example.
- bool IsMClass;
-
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
@@ -147,6 +150,10 @@ protected:
/// extension (ARMv7 only).
bool HasMPExtension;
+ /// HasVirtualization - True if the subtarget supports the Virtualization
+ /// extension.
+ bool HasVirtualization;
+
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
@@ -159,11 +166,21 @@ protected:
/// HasTrustZone - if true, processor supports TrustZone security extensions
bool HasTrustZone;
+ /// HasCrypto - if true, processor supports Cryptography extensions
+ bool HasCrypto;
+
+ /// HasCRC - if true, processor supports CRC instructions
+ bool HasCRC;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
bool AllowsUnalignedMem;
+ /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
+ /// blocks to conform to ARMv8 rule.
+ bool RestrictIT;
+
/// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
/// and such) instructions in Thumb2 code.
bool Thumb2DSP;
@@ -228,6 +245,7 @@ public:
bool hasV5TOps() const { return HasV5TOps; }
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
+ bool hasV6MOps() const { return HasV6MOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
@@ -246,8 +264,11 @@ public:
bool hasVFP2() const { return HasVFPv2; }
bool hasVFP3() const { return HasVFPv3; }
bool hasVFP4() const { return HasVFPv4; }
- bool hasV8FP() const { return HasV8FP; }
+ bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
+ bool hasCrypto() const { return HasCrypto; }
+ bool hasCRC() const { return HasCRC; }
+ bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -255,6 +276,9 @@ public:
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
+ bool hasAnyDataBarrier() const {
+ return HasDataBarrier || (hasV6Ops() && !isThumb());
+ }
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
@@ -275,10 +299,10 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
- bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
+ bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
- bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetELF() const { return !isTargetDarwin(); }
// ARM EABI is the bare-metal EABI described in ARM ABI documents and
// can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
@@ -296,8 +320,9 @@ public:
bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
bool isThumb2() const { return InThumbMode && HasThumb2; }
bool hasThumb2() const { return HasThumb2; }
- bool isMClass() const { return IsMClass; }
- bool isARClass() const { return !IsMClass; }
+ bool isMClass() const { return ARMProcClass == MClass; }
+ bool isRClass() const { return ARMProcClass == RClass; }
+ bool isAClass() const { return ARMProcClass == AClass; }
bool isR9Reserved() const { return IsR9Reserved; }
@@ -306,9 +331,15 @@ public:
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool restrictIT() const { return RestrictIT; }
+
const std::string & getCPUString() const { return CPUString; }
unsigned getMispredictionPenalty() const;
+
+ /// This function returns true if the target has sincos() routine in its
+ /// compiler runtime or math libraries.
+ bool hasSinCos() const;
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index e6dbcb6..be84bf6 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -196,8 +196,13 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only())
+ if (!getARMSubtarget().isThumb1Only()) {
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT() &&
+ !getARMSubtarget().prefers32BitThumb())
+ addPass(createThumb2SizeReductionPass());
addPass(&IfConverterID);
+ }
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index dfdf6ab..7ec71b2 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -47,7 +47,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCStreamer &Streamer) const {
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
- return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+ return MCSymbolRefExpr::Create(getSymbol(*Mang, GV),
MCSymbolRefExpr::VK_ARM_TARGET2,
getContext());
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 34576ba..6bbb38f 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -129,6 +129,9 @@ public:
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Op1Info = OK_AnyValue,
OperandValueKind Op2Info = OK_AnyValue) const;
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const;
/// @}
};
@@ -182,7 +185,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT> NEONFltDblTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -192,8 +195,7 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
- int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl),
- ISD, LT.second);
+ int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
if (Idx != -1)
return LT.first * NEONFltDblTbl[Idx].Cost;
}
@@ -207,7 +209,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT> NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -283,15 +286,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorConversionTbl,
- array_lengthof(NEONVectorConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONVectorConversionTbl[Idx].Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT> NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -314,16 +317,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONFloatConversionTbl,
- array_lengthof(NEONFloatConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONFloatConversionTbl[Idx].Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -347,16 +349,15 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONIntegerConversionTbl,
- array_lengthof(NEONIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return NEONIntegerConversionTbl[Idx].Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT> ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -368,11 +369,8 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
};
if (SrcTy.isInteger()) {
- int Idx =
- ConvertCostTableLookup<MVT>(ARMIntegerConversionTbl,
- array_lengthof(ARMIntegerConversionTbl),
- ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(), SrcTy.getSimpleVT());
if (Idx != -1)
return ARMIntegerConversionTbl[Idx].Cost;
}
@@ -400,7 +398,8 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
@@ -412,10 +411,9 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(CondTy);
EVT SelValTy = TLI->getValueType(ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl,
- array_lengthof(NEONVectorSelectTbl),
- ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
if (Idx != -1)
return NEONVectorSelectTbl[Idx].Cost;
}
@@ -448,7 +446,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
if (Kind != SK_Reverse)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT> NEONShuffleTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a double
// word (vrev) or two if we shuffle a quad word (vrev, vext).
{ ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
@@ -464,8 +462,7 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- int Idx = CostTableLookup<MVT>(NEONShuffleTbl, array_lengthof(NEONShuffleTbl),
- ISD::VECTOR_SHUFFLE, LT.second);
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx == -1)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -480,7 +477,7 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT> CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -524,14 +521,37 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueK
int Idx = -1;
if (ST->hasNEON())
- Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
- LT.second);
+ Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
if (Idx != -1)
return LT.first * CostTbl[Idx].Cost;
-
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
- Op2Info);
+ unsigned Cost =
+ TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
+
+ // This is somewhat of a hack. The problem that we are facing is that SROA
+ // creates a sequence of shift, and, or instructions to construct values.
+ // These sequences are recognized by the ISel and have zero-cost. Not so for
+ // the vectorized code. Because we have support for v2i64 but not i64 those
+ // sequences look particularily beneficial to vectorize.
+ // To work around this we increase the cost of v2i64 operations to make them
+ // seem less beneficial.
+ if (LT.second == MVT::v2i64 &&
+ Op2Info == TargetTransformInfo::OK_UniformConstantValue)
+ Cost += 4;
+
+ return Cost;
}
+unsigned ARMTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ if (Src->isVectorTy() && Alignment != 16 &&
+ Src->getVectorElementType()->isDoubleTy()) {
+ // Unaligned loads/stores are extremely inefficient.
+ // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
+ return LT.first * 4;
+ }
+ return LT.first;
+}
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 80e5c6e..e3f9e0d 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
+#include "ARMFeatures.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -24,6 +27,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -47,8 +51,14 @@ enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
class ARMAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
+ ARMTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = getParser().getStreamer().getTargetStreamer();
+ return static_cast<ARMTargetStreamer &>(TS);
+ }
+
// Unwind directives state
SMLoc FnStartLoc;
SMLoc CantUnwindLoc;
@@ -66,6 +76,8 @@ class ARMAsmParser : public MCTargetAsmParser {
// Map of register aliases registers via the .req directive.
StringMap<unsigned> RegisterReqs;
+ bool NextSymbolIsThumb;
+
struct {
ARMCC::CondCodes Cond; // Condition for IT block.
unsigned Mask:4; // Condition mask for instructions.
@@ -127,6 +139,8 @@ class ARMAsmParser : public MCTargetAsmParser {
bool parseDirectiveUnreq(SMLoc L);
bool parseDirectiveArch(SMLoc L);
bool parseDirectiveEabiAttr(SMLoc L);
+ bool parseDirectiveCPU(SMLoc L);
+ bool parseDirectiveFPU(SMLoc L);
bool parseDirectiveFnStart(SMLoc L);
bool parseDirectiveFnEnd(SMLoc L);
bool parseDirectiveCantUnwind(SMLoc L);
@@ -139,7 +153,8 @@ class ARMAsmParser : public MCTargetAsmParser {
StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
bool &CarrySetting, unsigned &ProcessorIMod,
StringRef &ITMask);
- void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+ void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
bool &CanAcceptPredicationCode);
bool isThumb() const {
@@ -158,6 +173,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasV6Ops() const {
return STI.getFeatureBits() & ARM::HasV6Ops;
}
+ bool hasV6MOps() const {
+ return STI.getFeatureBits() & ARM::HasV6MOps;
+ }
bool hasV7Ops() const {
return STI.getFeatureBits() & ARM::HasV7Ops;
}
@@ -221,6 +239,9 @@ class ARMAsmParser : public MCTargetAsmParser {
// Asm Match Converter Methods
void cvtThumbMultiply(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &);
+ void cvtThumbBranches(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &);
+
bool validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
bool processInstruction(MCInst &Inst,
@@ -229,8 +250,6 @@ class ARMAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool shouldOmitPredicateOperand(StringRef Mnemonic,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- bool isDeprecated(MCInst &Inst, StringRef &Info);
-
public:
enum ARMMatchResultTy {
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -242,8 +261,9 @@ public:
};
- ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser), FPReg(-1) {
+ ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), FPReg(-1) {
MCAsmParserExtension::Initialize(_Parser);
// Cache the MCRegisterInfo.
@@ -255,12 +275,7 @@ public:
// Not in an ITBlock to start with.
ITState.CurPosition = ~0U;
- // Set ELF header flags.
- // FIXME: This should eventually end up somewhere else where more
- // intelligent flag decisions can be made. For now we are just maintaining
- // the statu/parseDirects quo for ARM and setting EF_ARM_EABI_VER5 as the default.
- if (MCELFStreamer *MES = dyn_cast<MCELFStreamer>(&Parser.getStreamer()))
- MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+ NextSymbolIsThumb = false;
}
// Implementation of the MCTargetAsmParser interface:
@@ -277,6 +292,8 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
+ void onLabelParsed(MCSymbol *Symbol);
+
};
} // end anonymous namespace
@@ -601,7 +618,7 @@ public:
template<unsigned width, unsigned scale>
bool isUnsignedOffset() const {
if (!isImm()) return false;
- if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
int64_t Val = CE->getValue();
int64_t Align = 1LL << scale;
@@ -610,6 +627,22 @@ public:
}
return false;
}
+ // checks whether this operand is an signed offset which fits is a field
+ // of specified width and scaled by a specific number of bits
+ template<unsigned width, unsigned scale>
+ bool isSignedOffset() const {
+ if (!isImm()) return false;
+ if (isa<MCSymbolRefExpr>(Imm.Val)) return true;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Align = 1LL << scale;
+ int64_t Max = Align * ((1LL << (width-1)) - 1);
+ int64_t Min = -Align * (1LL << (width-1));
+ return ((Val % Align) == 0) && (Val >= Min) && (Val <= Max);
+ }
+ return false;
+ }
+
// checks whether this operand is a memory operand computed as an offset
// applied to PC. the offset may have 8 bits of magnitude and is represented
// with two bits of shift. textually it may be either [pc, #imm], #imm or
@@ -628,7 +661,7 @@ public:
Val = Memory.OffsetImm->getValue();
}
else return false;
- return ((Val % 4) == 0) && (Val >= -1020) && (Val <= 1020);
+ return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020);
}
bool isFPImm() const {
if (!isImm()) return false;
@@ -658,13 +691,6 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
- bool isImm0_4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 5;
- }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -687,6 +713,13 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
+ bool isImm0_239() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 240;
+ }
bool isImm0_255() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -848,6 +881,15 @@ public:
int64_t Value = CE->getValue();
return Value >= 0 && Value < 65536;
}
+ bool isImm256_65535Expr() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE) return true;
+ int64_t Value = CE->getValue();
+ return Value >= 256 && Value < 65536;
+ }
bool isImm0_65535Expr() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -927,7 +969,8 @@ public:
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return ARM_AM::getT2SOImmVal(~Value) != -1;
+ return ARM_AM::getT2SOImmVal(Value) == -1 &&
+ ARM_AM::getT2SOImmVal(~Value) != -1;
}
bool isT2SOImmNeg() const {
if (!isImm()) return false;
@@ -1773,8 +1816,6 @@ public:
void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int32_t Imm = Memory.OffsetImm->getValue();
- // FIXME: Handle #-0
- if (Imm == INT32_MIN) Imm = 0;
Inst.addOperand(MCOperand::CreateImm(Imm));
}
@@ -2494,7 +2535,7 @@ void ARMOperand::print(raw_ostream &OS) const {
getImm()->print(OS);
break;
case k_MemBarrierOpt:
- OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+ OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt(), false) << ">";
break;
case k_InstSyncBarrierOpt:
OS << "<ARM_ISB::" << InstSyncBOptToString(getInstSyncBarrierOpt()) << ">";
@@ -2831,8 +2872,9 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
return -1;
switch (Name[2]) {
default: return -1;
- case '0': return 10;
- case '1': return 11;
+ // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
+ case '0': return CoprocOp == 'p'? -1: 10;
+ case '1': return CoprocOp == 'p'? -1: 11;
case '2': return 12;
case '3': return 13;
case '4': return 14;
@@ -3439,18 +3481,27 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
.Case("sy", ARM_MB::SY)
.Case("st", ARM_MB::ST)
+ .Case("ld", ARM_MB::LD)
.Case("sh", ARM_MB::ISH)
.Case("ish", ARM_MB::ISH)
.Case("shst", ARM_MB::ISHST)
.Case("ishst", ARM_MB::ISHST)
+ .Case("ishld", ARM_MB::ISHLD)
.Case("nsh", ARM_MB::NSH)
.Case("un", ARM_MB::NSH)
.Case("nshst", ARM_MB::NSHST)
+ .Case("nshld", ARM_MB::NSHLD)
.Case("unst", ARM_MB::NSHST)
.Case("osh", ARM_MB::OSH)
.Case("oshst", ARM_MB::OSHST)
+ .Case("oshld", ARM_MB::OSHLD)
.Default(~0U);
+ // ishld, oshld, nshld and ld are only available from ARMv8.
+ if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD ||
+ Opt == ARM_MB::NSHLD || Opt == ARM_MB::LD))
+ Opt = ~0U;
+
if (Opt == ~0U)
return MatchOperand_NoMatch;
@@ -3498,7 +3549,7 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.is(AsmToken::Identifier)) {
StringRef OptStr = Tok.getString();
- if (OptStr.lower() == "sy")
+ if (OptStr.equals_lower("sy"))
Opt = ARM_ISB::SY;
else
return MatchOperand_NoMatch;
@@ -4102,6 +4153,65 @@ cvtThumbMultiply(MCInst &Inst,
((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
}
+void ARMAsmParser::
+cvtThumbBranches(MCInst &Inst,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ int CondOp = -1, ImmOp = -1;
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc: CondOp = 1; ImmOp = 2; break;
+
+ case ARM::t2B:
+ case ARM::t2Bcc: CondOp = 1; ImmOp = 3; break;
+
+ default: llvm_unreachable("Unexpected instruction in cvtThumbBranches");
+ }
+ // first decide whether or not the branch should be conditional
+ // by looking at it's location relative to an IT block
+ if(inITBlock()) {
+ // inside an IT block we cannot have any conditional branches. any
+ // such instructions needs to be converted to unconditional form
+ switch(Inst.getOpcode()) {
+ case ARM::tBcc: Inst.setOpcode(ARM::tB); break;
+ case ARM::t2Bcc: Inst.setOpcode(ARM::t2B); break;
+ }
+ } else {
+ // outside IT blocks we can only have unconditional branches with AL
+ // condition code or conditional branches with non-AL condition code
+ unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+ switch(Inst.getOpcode()) {
+ case ARM::tB:
+ case ARM::tBcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::tB : ARM::tBcc);
+ break;
+ case ARM::t2B:
+ case ARM::t2Bcc:
+ Inst.setOpcode(Cond == ARMCC::AL ? ARM::t2B : ARM::t2Bcc);
+ break;
+ }
+ }
+
+ // now decide on encoding size based on branch target range
+ switch(Inst.getOpcode()) {
+ // classify tB as either t2B or t1B based on range of immediate operand
+ case ARM::tB: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<11, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2B);
+ break;
+ }
+ // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
+ case ARM::tBcc: {
+ ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
+ if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+ Inst.setOpcode(ARM::t2Bcc);
+ break;
+ }
+ }
+ ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
+ ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
+}
+
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
bool ARMAsmParser::
@@ -4601,7 +4711,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
- Mnemonic == "vaclt" || Mnemonic == "vacle" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" || Mnemonic == "hlt" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -4688,8 +4798,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
//
// FIXME: It would be nice to autogen this.
void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
- bool &CanAcceptPredicationCode) {
+getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
Mnemonic == "add" || Mnemonic == "adc" ||
@@ -4707,12 +4817,14 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
- Mnemonic == "trap" || Mnemonic == "setend" ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") ||
Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") ||
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
- Mnemonic == "vrintm") {
+ Mnemonic == "vrintm" || Mnemonic.startswith("aes") ||
+ Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
+ (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
@@ -4726,7 +4838,10 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
Mnemonic != "stc2" && Mnemonic != "stc2l" &&
!Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
} else if (isThumbOne()) {
- CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
+ if (hasV6MOps())
+ CanAcceptPredicationCode = Mnemonic != "movs";
+ else
+ CanAcceptPredicationCode = Mnemonic != "nop" && Mnemonic != "movs";
} else
CanAcceptPredicationCode = true;
}
@@ -4877,14 +4992,6 @@ bool ARMAsmParser::shouldOmitPredicateOperand(
return false;
}
-bool ARMAsmParser::isDeprecated(MCInst &Inst, StringRef &Info) {
- if (hasV8Ops() && Inst.getOpcode() == ARM::SETEND) {
- Info = "armv8";
- return true;
- }
- return false;
-}
-
static bool isDataTypeToken(StringRef Tok) {
return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
@@ -4980,7 +5087,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// the matcher deal with finding the right instruction or generating an
// appropriate error.
bool CanAcceptCarrySet, CanAcceptPredicationCode;
- getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode);
+ getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode);
// If we had a carry-set on an instruction that can't do that, issue an
// error.
@@ -5115,8 +5222,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
if (!isThumb() && Operands.size() > 4 &&
- (Mnemonic == "ldrexd" || Mnemonic == "strexd")) {
- bool isLoad = (Mnemonic == "ldrexd");
+ (Mnemonic == "ldrexd" || Mnemonic == "strexd" || Mnemonic == "ldaexd" ||
+ Mnemonic == "stlexd")) {
+ bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
unsigned Idx = isLoad ? 2 : 3;
ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
@@ -5200,45 +5308,44 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
return false;
}
-// FIXME: We would really prefer to have MCInstrInfo (the wrapper around
-// the ARMInsts array) instead. Getting that here requires awkward
-// API changes, though. Better way?
-namespace llvm {
-extern const MCInstrDesc ARMInsts[];
-}
-static const MCInstrDesc &getInstDesc(unsigned Opcode) {
- return ARMInsts[Opcode];
+// Return true if instruction has the interesting property of being
+// allowed in IT blocks, but not being predicable.
+static bool instIsBreakpoint(const MCInst &Inst) {
+ return Inst.getOpcode() == ARM::tBKPT ||
+ Inst.getOpcode() == ARM::BKPT ||
+ Inst.getOpcode() == ARM::tHLT ||
+ Inst.getOpcode() == ARM::HLT;
+
}
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::
validateInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
+
// Check the IT block state first.
- // NOTE: BKPT instruction has the interesting property of being
- // allowed in IT blocks, but not being predicable. It just always
- // executes.
- if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
- Inst.getOpcode() != ARM::BKPT) {
- unsigned bit = 1;
+ // NOTE: BKPT and HLT instructions have the interesting property of being
+ // allowed in IT blocks, but not being predicable. They just always execute.
+ if (inITBlock() && !instIsBreakpoint(Inst)) {
+ unsigned Bit = 1;
if (ITState.FirstCond)
ITState.FirstCond = false;
else
- bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+ Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
// The instruction must be predicable.
if (!MCID.isPredicable())
return Error(Loc, "instructions in IT block must be predicable");
unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
- unsigned ITCond = bit ? ITState.Cond :
+ unsigned ITCond = Bit ? ITState.Cond :
ARMCC::getOppositeCondition(ITState.Cond);
if (Cond != ITCond) {
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
- for (unsigned i = 1; i < Operands.size(); ++i)
- if (static_cast<ARMOperand*>(Operands[i])->isCondCode())
- CondLoc = Operands[i]->getStartLoc();
+ for (unsigned I = 1; I < Operands.size(); ++I)
+ if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+ CondLoc = Operands[I]->getStartLoc();
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
"', but expected '" +
@@ -5247,20 +5354,55 @@ validateInstruction(MCInst &Inst,
// Check for non-'al' condition codes outside of the IT block.
} else if (isThumbTwo() && MCID.isPredicable() &&
Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
- ARMCC::AL && Inst.getOpcode() != ARM::tB &&
- Inst.getOpcode() != ARM::t2B)
+ ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
+ Inst.getOpcode() != ARM::t2Bcc)
return Error(Loc, "predicated instructions must be in IT block");
- switch (Inst.getOpcode()) {
+ const unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST: {
+ const unsigned RtReg = Inst.getOperand(0).getReg();
+
+ // Rt can't be R14.
+ if (RtReg == ARM::LR)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt can't be R14");
+
+ const unsigned Rt = MRI->getEncodingValue(RtReg);
+ // Rt must be even-numbered.
+ if ((Rt & 1) == 1)
+ return Error(Operands[3]->getStartLoc(),
+ "Rt must be even-numbered");
+
// Rt2 must be Rt + 1.
- unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
- unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ const unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
if (Rt2 != Rt + 1)
return Error(Operands[3]->getStartLoc(),
"destination operands must be sequential");
+
+ if (Opcode == ARM::LDRD_PRE || Opcode == ARM::LDRD_POST) {
+ const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(3).getReg());
+ // For addressing modes with writeback, the base register needs to be
+ // different from the destination registers.
+ if (Rn == Rt || Rn == Rt2)
+ return Error(Operands[3]->getStartLoc(),
+ "base register needs to be different from destination "
+ "registers");
+ }
+
+ return false;
+ }
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRD_PRE:
+ case ARM::t2LDRD_POST: {
+ // Rt2 must be different from Rt.
+ unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+ unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+ if (Rt2 == Rt)
+ return Error(Operands[3]->getStartLoc(),
+ "destination operands can't be identical");
return false;
}
case ARM::STRD: {
@@ -5284,50 +5426,77 @@ validateInstruction(MCInst &Inst,
}
case ARM::SBFX:
case ARM::UBFX: {
- // width must be in range [1, 32-lsb]
- unsigned lsb = Inst.getOperand(2).getImm();
- unsigned widthm1 = Inst.getOperand(3).getImm();
- if (widthm1 >= 32 - lsb)
+ // Width must be in range [1, 32-lsb].
+ unsigned LSB = Inst.getOperand(2).getImm();
+ unsigned Widthm1 = Inst.getOperand(3).getImm();
+ if (Widthm1 >= 32 - LSB)
return Error(Operands[5]->getStartLoc(),
"bitfield width must be in range [1,32-lsb]");
return false;
}
+ // Notionally handles ARM::tLDMIA_UPD too.
case ARM::tLDMIA: {
// If we're parsing Thumb2, the .w variant is available and handles
- // most cases that are normally illegal for a Thumb1 LDM
- // instruction. We'll make the transformation in processInstruction()
- // if necessary.
+ // most cases that are normally illegal for a Thumb1 LDM instruction.
+ // We'll make the transformation in processInstruction() if necessary.
//
// Thumb LDM instructions are writeback iff the base register is not
// in the register list.
unsigned Rn = Inst.getOperand(0).getReg();
- bool hasWritebackToken =
+ bool HasWritebackToken =
(static_cast<ARMOperand*>(Operands[3])->isToken() &&
static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) && !isThumbTwo())
- return Error(Operands[3 + hasWritebackToken]->getStartLoc(),
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
+ return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
"registers must be in range r0-r7");
// If we should have writeback, then there should be a '!' token.
- if (!listContainsBase && !hasWritebackToken && !isThumbTwo())
+ if (!ListContainsBase && !HasWritebackToken && !isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"writeback operator '!' expected");
// If we should not have writeback, there must not be a '!'. This is
// true even for the 32-bit wide encodings.
- if (listContainsBase && hasWritebackToken)
+ if (ListContainsBase && HasWritebackToken)
return Error(Operands[3]->getStartLoc(),
"writeback operator '!' not allowed when base register "
"in register list");
break;
}
- case ARM::t2LDMIA_UPD: {
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ // ARM variants loading and updating the same register are only officially
+ // UNPREDICTABLE on v7 upwards. Goodness knows what they did before.
+ if (!hasV7Ops())
+ break;
+ // Fallthrough
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD: {
if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg()))
- return Error(Operands[4]->getStartLoc(),
- "writeback operator '!' not allowed when base register "
- "in register list");
+ return Error(Operands.back()->getStartLoc(),
+ "writeback register not allowed in register list");
break;
}
+ case ARM::sysLDMIA_UPD:
+ case ARM::sysLDMDA_UPD:
+ case ARM::sysLDMDB_UPD:
+ case ARM::sysLDMIB_UPD:
+ if (!listContainsReg(Inst, 3, ARM::PC))
+ return Error(Operands[4]->getStartLoc(),
+ "writeback register only allowed on system LDM "
+ "if PC in register-list");
+ break;
+ case ARM::sysSTMIA_UPD:
+ case ARM::sysSTMDA_UPD:
+ case ARM::sysSTMDB_UPD:
+ case ARM::sysSTMIB_UPD:
+ return Error(Operands[2]->getStartLoc(),
+ "system STM cannot have writeback register");
+ break;
case ARM::tMUL: {
// The second source operand must be the same register as the destination
// operand.
@@ -5351,26 +5520,35 @@ validateInstruction(MCInst &Inst,
// so only issue a diagnostic for thumb1. The instructions will be
// switched to the t2 encodings in processInstruction() if necessary.
case ARM::tPOP: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::PC, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or pc");
break;
}
case ARM::tPUSH: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+ bool ListContainsBase;
+ if (checkLowRegisterList(Inst, 2, 0, ARM::LR, ListContainsBase) &&
!isThumbTwo())
return Error(Operands[2]->getStartLoc(),
"registers must be in range r0-r7 or lr");
break;
}
case ARM::tSTMIA_UPD: {
- bool listContainsBase;
- if (checkLowRegisterList(Inst, 4, 0, 0, listContainsBase) && !isThumbTwo())
+ bool ListContainsBase, InvalidLowList;
+ InvalidLowList = checkLowRegisterList(Inst, 4, Inst.getOperand(0).getReg(),
+ 0, ListContainsBase);
+ if (InvalidLowList && !isThumbTwo())
return Error(Operands[4]->getStartLoc(),
"registers must be in range r0-r7");
+
+ // This would be converted to a 32-bit stm, but that's not valid if the
+ // writeback register is in the list.
+ if (InvalidLowList && ListContainsBase)
+ return Error(Operands[4]->getStartLoc(),
+ "writeback operator '!' not allowed when base register "
+ "in register list");
break;
}
case ARM::tADDrSP: {
@@ -5383,11 +5561,29 @@ validateInstruction(MCInst &Inst,
}
break;
}
+ // Final range checking for Thumb unconditional branch instructions.
+ case ARM::tB:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2B: {
+ int op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+ return Error(Operands[op]->getStartLoc(), "branch target out of range");
+ break;
+ }
+ // Final range checking for Thumb conditional branch instructions.
+ case ARM::tBcc:
+ if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+ return Error(Operands[2]->getStartLoc(), "branch target out of range");
+ break;
+ case ARM::t2Bcc: {
+ int Op = (Operands[2]->isImm()) ? 2 : 3;
+ if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+ return Error(Operands[Op]->getStartLoc(), "branch target out of range");
+ break;
+ }
}
-
- StringRef DepInfo;
- if (isDeprecated(Inst, DepInfo))
- Warning(Loc, "deprecated on " + DepInfo);
return false;
}
@@ -7425,7 +7621,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// 16-bit thumb arithmetic instructions either require or preclude the 'S'
// suffix depending on whether they're in an IT block or not.
unsigned Opc = Inst.getOpcode();
- const MCInstrDesc &MCID = getInstDesc(Opc);
+ const MCInstrDesc &MCID = MII.get(Opc);
if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
assert(MCID.hasOptionalDef() &&
"optionally flag setting instruction missing optional def operand");
@@ -7486,12 +7682,22 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
- // Some instructions need post-processing to, for example, tweak which
- // encoding is selected. Loop on it while changes happen so the
- // individual transformations can chain off each other. E.g.,
- // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
- while (processInstruction(Inst, Operands))
- ;
+ { // processInstruction() updates inITBlock state, we need to save it away
+ bool wasInITBlock = inITBlock();
+
+ // Some instructions need post-processing to, for example, tweak which
+ // encoding is selected. Loop on it while changes happen so the
+ // individual transformations can chain off each other. E.g.,
+ // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+ while (processInstruction(Inst, Operands))
+ ;
+
+ // Only after the instruction is fully processed, we can validate it
+ if (wasInITBlock && hasV8Ops() && isThumb() &&
+ !isV8EligibleForIT(&Inst, 2)) {
+ Warning(IDLoc, "deprecated instruction in IT block");
+ }
+ }
// Only move forward at the very end so that everything in validate
// and process gets a consistent answer about whether we're in an IT
@@ -7544,15 +7750,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
- case Match_ImmRange0_4: {
+ case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
}
- case Match_ImmRange0_15: {
+ case Match_ImmRange0_239: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
+ return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
}
@@ -7580,6 +7786,10 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveArch(DirectiveID.getLoc());
else if (IDVal == ".eabi_attribute")
return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
else if (IDVal == ".fnstart")
return parseDirectiveFnStart(DirectiveID.getLoc());
else if (IDVal == ".fnend")
@@ -7658,13 +7868,18 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
return false;
}
+void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+ if (NextSymbolIsThumb) {
+ getParser().getStreamer().EmitThumbFunc(Symbol);
+ NextSymbolIsThumb = false;
+ }
+}
+
/// parseDirectiveThumbFunc
/// ::= .thumbfunc symbol_name
bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
bool isMachO = MAI->hasSubsectionsViaSymbols();
- StringRef Name;
- bool needFuncName = true;
// Darwin asm has (optionally) function name after .thumb_func direction
// ELF doesn't
@@ -7673,29 +7888,19 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
if (Tok.isNot(AsmToken::EndOfStatement)) {
if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
return Error(L, "unexpected token in .thumb_func directive");
- Name = Tok.getIdentifier();
+ MCSymbol *Func =
+ getParser().getContext().GetOrCreateSymbol(Tok.getIdentifier());
+ getParser().getStreamer().EmitThumbFunc(Func);
Parser.Lex(); // Consume the identifier token.
- needFuncName = false;
+ return false;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement))
return Error(L, "unexpected token in directive");
- // Eat the end of statement and any blank lines that follow.
- while (getLexer().is(AsmToken::EndOfStatement))
- Parser.Lex();
+ NextSymbolIsThumb = true;
- // FIXME: assuming function name will be the line following .thumb_func
- // We really should be checking the next symbol definition even if there's
- // stuff in between.
- if (needFuncName) {
- Name = Parser.getTok().getIdentifier();
- }
-
- // Mark symbol as a thumb symbol.
- MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
- getParser().getStreamer().EmitThumbFunc(Func);
return false;
}
@@ -7807,7 +8012,48 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// parseDirectiveEabiAttr
/// ::= .eabi_attribute int, int
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
- return true;
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Tag = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat tag integer
+
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ return Error(L, "comma expected");
+ Parser.Lex(); // skip comma
+
+ L = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::Integer))
+ return Error(L, "integer expected");
+ int64_t Value = Parser.getTok().getIntVal();
+ Parser.Lex(); // eat value integer
+
+ getTargetStreamer().emitAttribute(Tag, Value);
+ return false;
+}
+
+/// parseDirectiveCPU
+/// ::= .cpu str
+bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
+ StringRef CPU = getParser().parseStringToEndOfStatement().trim();
+ getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
+ return false;
+}
+
+/// parseDirectiveFPU
+/// ::= .fpu str
+bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
+ StringRef FPU = getParser().parseStringToEndOfStatement().trim();
+
+ unsigned ID = StringSwitch<unsigned>(FPU)
+#define ARM_FPU_NAME(NAME, ID) .Case(NAME, ARM::ID)
+#include "ARMFPUName.def"
+ .Default(ARM::INVALID_FPU);
+
+ if (ID == ARM::INVALID_FPU)
+ return Error(L, "Unknown FPU name");
+
+ getTargetStreamer().emitFPU(ID);
+ return false;
}
/// parseDirectiveFnStart
@@ -7820,7 +8066,7 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
}
FnStartLoc = L;
- getParser().getStreamer().EmitFnStart();
+ getTargetStreamer().emitFnStart();
return false;
}
@@ -7833,8 +8079,7 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
// Reset the unwind directives parser state
resetUnwindDirectiveParserState();
-
- getParser().getStreamer().EmitFnEnd();
+ getTargetStreamer().emitFnEnd();
return false;
}
@@ -7856,7 +8101,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
return true;
}
- getParser().getStreamer().EmitCantUnwind();
+ getTargetStreamer().emitCantUnwind();
return false;
}
@@ -7887,7 +8132,7 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
Parser.Lex();
MCSymbol *PR = getParser().getContext().GetOrCreateSymbol(Name);
- getParser().getStreamer().EmitPersonality(PR);
+ getTargetStreamer().emitPersonality(PR);
return false;
}
@@ -7904,7 +8149,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
return true;
}
- getParser().getStreamer().EmitHandlerData();
+ getTargetStreamer().emitHandlerData();
return false;
}
@@ -7964,9 +8209,8 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
Offset = CE->getValue();
}
- getParser().getStreamer().EmitSetFP(static_cast<unsigned>(NewFPReg),
- static_cast<unsigned>(NewSPReg),
- Offset);
+ getTargetStreamer().emitSetFP(static_cast<unsigned>(NewFPReg),
+ static_cast<unsigned>(NewSPReg), Offset);
return false;
}
@@ -7995,7 +8239,7 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
if (!CE)
return Error(ExLoc, "pad offset must be an immediate");
- getParser().getStreamer().EmitPad(CE->getValue());
+ getTargetStreamer().emitPad(CE->getValue());
return false;
}
@@ -8027,7 +8271,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
if (IsVector && !Op->isDPRRegList())
return Error(L, ".vsave expects DPR registers");
- getParser().getStreamer().EmitRegSave(Op->getRegList(), IsVector);
+ getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
return false;
}
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8a06664..9c7988f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -323,8 +323,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -507,6 +505,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
MI.clear();
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -823,16 +829,28 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Check(result, AddThumbPredicate(MI));
return result;
}
- }
- MI.clear();
- uint32_t NEONv8Insn = insn32;
- NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
- result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
- Size = 4;
- return result;
+ MI.clear();
+ uint32_t NEONCryptoInsn = insn32;
+ NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
+ NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
+ NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25
+ result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn,
+ Address, this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
+
+ MI.clear();
+ uint32_t NEONv8Insn = insn32;
+ NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
+ result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
+ this, STI);
+ if (result != MCDisassembler::Fail) {
+ Size = 4;
+ return result;
+ }
}
MI.clear();
@@ -1185,20 +1203,22 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- bool writebackLoad = false;
- unsigned writebackReg = 0;
+ bool NeedDisjointWriteback = false;
+ unsigned WritebackReg = 0;
switch (Inst.getOpcode()) {
- default:
- break;
- case ARM::LDMIA_UPD:
- case ARM::LDMDB_UPD:
- case ARM::LDMIB_UPD:
- case ARM::LDMDA_UPD:
- case ARM::t2LDMIA_UPD:
- case ARM::t2LDMDB_UPD:
- writebackLoad = true;
- writebackReg = Inst.getOperand(0).getReg();
- break;
+ default:
+ break;
+ case ARM::LDMIA_UPD:
+ case ARM::LDMDB_UPD:
+ case ARM::LDMIB_UPD:
+ case ARM::LDMDA_UPD:
+ case ARM::t2LDMIA_UPD:
+ case ARM::t2LDMDB_UPD:
+ case ARM::t2STMIA_UPD:
+ case ARM::t2STMDB_UPD:
+ NeedDisjointWriteback = true;
+ WritebackReg = Inst.getOperand(0).getReg();
+ break;
}
// Empty register lists are not allowed.
@@ -1208,7 +1228,7 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder)))
return MCDisassembler::Fail;
// Writeback not allowed if Rn is in the target list.
- if (writebackLoad && writebackReg == Inst.end()[-1].getReg())
+ if (NeedDisjointWriteback && WritebackReg == Inst.end()[-1].getReg())
Check(S, MCDisassembler::SoftFail);
}
}
@@ -1343,6 +1363,11 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
break;
}
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && (coproc != 14))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(coproc));
Inst.addOperand(MCOperand::CreateImm(CRd));
if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3794,6 +3819,11 @@ static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
if (Val == 0xA || Val == 0xB)
return MCDisassembler::Fail;
+ uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo()
+ .getFeatureBits();
+ if ((featureBits & ARM::HasV8Ops) && !(Val == 14 || Val == 15))
+ return MCDisassembler::Fail;
+
Inst.addOperand(MCOperand::CreateImm(Val));
return MCDisassembler::Success;
}
@@ -4901,15 +4931,6 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
-static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
- const void *Decoder)
-{
- unsigned Imm = fieldFromInstruction(Insn, 0, 3);
- if (Imm > 4) return MCDisassembler::Fail;
- Inst.addOperand(MCOperand::CreateImm(Imm));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 97da232..f897028 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -76,14 +76,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
unsigned Opcode = MI->getOpcode();
+ switch(Opcode) {
+
// Check for HINT instructions w/ canonical names.
- if (Opcode == ARM::HINT || Opcode == ARM::t2HINT) {
+ case ARM::HINT:
+ case ARM::tHINT:
+ case ARM::t2HINT:
switch (MI->getOperand(0).getImm()) {
case 0: O << "\tnop"; break;
case 1: O << "\tyield"; break;
case 2: O << "\twfe"; break;
case 3: O << "\twfi"; break;
case 4: O << "\tsev"; break;
+ case 5:
+ if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+ O << "\tsevl";
+ break;
+ } // Fallthrough for non-v8
default:
// Anything else should just print normally.
printInstruction(MI, O);
@@ -95,10 +104,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << ".w";
printAnnotation(O, Annot);
return;
- }
// Check for MOVs and print canonical forms, instead.
- if (Opcode == ARM::MOVsr) {
+ case ARM::MOVsr: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -121,7 +129,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (Opcode == ARM::MOVsi) {
+ case ARM::MOVsi: {
// FIXME: Thumb variants?
const MCOperand &Dst = MI->getOperand(0);
const MCOperand &MO1 = MI->getOperand(1);
@@ -149,81 +157,91 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
-
// A8.6.123 PUSH
- if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print PUSH if there are at least two registers in the list.
- O << '\t' << "push";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2STMDB_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::STR_PRE_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(3).getImm() == -4) {
- O << '\t' << "push";
- printPredicateOperand(MI, 4, O);
- O << "\t{";
- printRegName(O, MI->getOperand(1).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::STMDB_UPD:
+ case ARM::t2STMDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print PUSH if there are at least two registers in the list.
+ O << '\t' << "push";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2STMDB_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::STR_PRE_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(3).getImm() == -4) {
+ O << '\t' << "push";
+ printPredicateOperand(MI, 4, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(1).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.122 POP
- if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP &&
- MI->getNumOperands() > 5) {
- // Should only print POP if there are at least two registers in the list.
- O << '\t' << "pop";
- printPredicateOperand(MI, 2, O);
- if (Opcode == ARM::t2LDMIA_UPD)
- O << ".w";
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
- if (Opcode == ARM::LDR_POST_IMM && MI->getOperand(2).getReg() == ARM::SP &&
- MI->getOperand(4).getImm() == 4) {
- O << '\t' << "pop";
- printPredicateOperand(MI, 5, O);
- O << "\t{";
- printRegName(O, MI->getOperand(0).getReg());
- O << "}";
- printAnnotation(O, Annot);
- return;
- }
-
+ case ARM::LDMIA_UPD:
+ case ARM::t2LDMIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
+ // Should only print POP if there are at least two registers in the list.
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 2, O);
+ if (Opcode == ARM::t2LDMIA_UPD)
+ O << ".w";
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
+
+ case ARM::LDR_POST_IMM:
+ if (MI->getOperand(2).getReg() == ARM::SP &&
+ MI->getOperand(4).getImm() == 4) {
+ O << '\t' << "pop";
+ printPredicateOperand(MI, 5, O);
+ O << "\t{";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << "}";
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.355 VPUSH
- if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpush";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VSTMSDB_UPD:
+ case ARM::VSTMDDB_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpush";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
// A8.6.354 VPOP
- if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
- MI->getOperand(0).getReg() == ARM::SP) {
- O << '\t' << "vpop";
- printPredicateOperand(MI, 2, O);
- O << '\t';
- printRegisterList(MI, 4, O);
- printAnnotation(O, Annot);
- return;
- }
+ case ARM::VLDMSIA_UPD:
+ case ARM::VLDMDIA_UPD:
+ if (MI->getOperand(0).getReg() == ARM::SP) {
+ O << '\t' << "vpop";
+ printPredicateOperand(MI, 2, O);
+ O << '\t';
+ printRegisterList(MI, 4, O);
+ printAnnotation(O, Annot);
+ return;
+ } else
+ break;
- if (Opcode == ARM::tLDMIA) {
+ case ARM::tLDMIA: {
bool Writeback = true;
unsigned BaseReg = MI->getOperand(0).getReg();
for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
@@ -249,9 +267,10 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// GPRs. However, when decoding them, the two GRPs cannot be automatically
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
- if (Opcode == ARM::LDREXD || Opcode == ARM::STREXD) {
+ case ARM::LDREXD: case ARM::STREXD:
+ case ARM::LDAEXD: case ARM::STLEXD:
const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
- bool isStore = Opcode == ARM::STREXD;
+ bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
if (MRC.contains(Reg)) {
MCInst NewMI;
@@ -676,7 +695,7 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val);
+ O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
}
void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b1e25d8..5615b80 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -640,16 +640,16 @@ public:
// FIXME: This should be in a separate file.
class DarwinARMAsmBackend : public ARMAsmBackend {
public:
- const object::mach::CPUSubtypeARM Subtype;
+ const MachO::CPUSubTypeARM Subtype;
DarwinARMAsmBackend(const Target &T, const StringRef TT,
- object::mach::CPUSubtypeARM st)
+ MachO::CPUSubTypeARM st)
: ARMAsmBackend(T, TT), Subtype(st) {
HasDataInCodeSupport = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_ARM,
+ MachO::CPU_TYPE_ARM,
Subtype);
}
@@ -660,22 +660,24 @@ public:
} // end anonymous namespace
-MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin()) {
- object::mach::CPUSubtypeARM CS =
- StringSwitch<object::mach::CPUSubtypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", object::mach::CSARM_V4T)
- .Cases("armv5e", "thumbv5e",object::mach::CSARM_V5TEJ)
- .Cases("armv6", "thumbv6", object::mach::CSARM_V6)
- .Cases("armv6m", "thumbv6m", object::mach::CSARM_V6M)
- .Cases("armv7em", "thumbv7em", object::mach::CSARM_V7EM)
- .Cases("armv7f", "thumbv7f", object::mach::CSARM_V7F)
- .Cases("armv7k", "thumbv7k", object::mach::CSARM_V7K)
- .Cases("armv7m", "thumbv7m", object::mach::CSARM_V7M)
- .Cases("armv7s", "thumbv7s", object::mach::CSARM_V7S)
- .Default(object::mach::CSARM_V7);
+ MachO::CPUSubTypeARM CS =
+ StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
+ .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
+ .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
+ .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
+ .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
+ .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
+ .Cases("armv7f", "thumbv7f", MachO::CPU_SUBTYPE_ARM_V7F)
+ .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
+ .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
+ .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
+ .Default(MachO::CPU_SUBTYPE_ARM_V7);
return new DarwinARMAsmBackend(T, TT, CS);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ff9917d..af939fc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -121,41 +121,41 @@ namespace ARM_MB {
// the option field for memory barrier operations.
enum MemBOpt {
RESERVED_0 = 0,
- RESERVED_1 = 1,
+ OSHLD = 1,
OSHST = 2,
OSH = 3,
RESERVED_4 = 4,
- RESERVED_5 = 5,
+ NSHLD = 5,
NSHST = 6,
NSH = 7,
RESERVED_8 = 8,
- RESERVED_9 = 9,
+ ISHLD = 9,
ISHST = 10,
ISH = 11,
RESERVED_12 = 12,
- RESERVED_13 = 13,
+ LD = 13,
ST = 14,
SY = 15
};
- inline static const char *MemBOptToString(unsigned val) {
+ inline static const char *MemBOptToString(unsigned val, bool HasV8) {
switch (val) {
default: llvm_unreachable("Unknown memory operation");
case SY: return "sy";
case ST: return "st";
- case RESERVED_13: return "#0xd";
+ case LD: return HasV8 ? "ld" : "#0xd";
case RESERVED_12: return "#0xc";
case ISH: return "ish";
case ISHST: return "ishst";
- case RESERVED_9: return "#0x9";
+ case ISHLD: return HasV8 ? "ishld" : "#0x9";
case RESERVED_8: return "#0x8";
case NSH: return "nsh";
case NSHST: return "nshst";
- case RESERVED_5: return "#0x5";
+ case NSHLD: return HasV8 ? "nshld" : "#0x5";
case RESERVED_4: return "#0x4";
case OSH: return "osh";
case OSHST: return "oshst";
- case RESERVED_1: return "#0x1";
+ case OSHLD: return HasV8 ? "oshld" : "#0x1";
case RESERVED_0: return "#0x0";
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6b98205..471897d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,6 +13,8 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMBuildAttrs.h"
+#include "ARMFPUName.h"
#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
#include "ARMUnwindOpAsm.h"
@@ -27,6 +29,7 @@
#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -36,7 +39,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
@@ -45,8 +50,218 @@ static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
}
+static const char *GetFPUName(unsigned ID) {
+ switch (ID) {
+ default:
+ llvm_unreachable("Unknown FPU kind");
+ break;
+#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
+#include "ARMFPUName.def"
+ }
+ return NULL;
+}
+
namespace {
+class ARMELFStreamer;
+
+class ARMTargetAsmStreamer : public ARMTargetStreamer {
+ formatted_raw_ostream &OS;
+ MCInstPrinter &InstPrinter;
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+public:
+ ARMTargetAsmStreamer(formatted_raw_ostream &OS, MCInstPrinter &InstPrinter);
+};
+
+ARMTargetAsmStreamer::ARMTargetAsmStreamer(formatted_raw_ostream &OS,
+ MCInstPrinter &InstPrinter)
+ : OS(OS), InstPrinter(InstPrinter) {}
+void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
+void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
+void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
+ OS << "\t.personality " << Personality->getName() << '\n';
+}
+void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ OS << "\t.setfp\t";
+ InstPrinter.printRegName(OS, FpReg);
+ OS << ", ";
+ InstPrinter.printRegName(OS, SpReg);
+ if (Offset)
+ OS << ", #" << Offset;
+ OS << '\n';
+}
+void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
+ OS << "\t.pad\t#" << Offset << '\n';
+}
+void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ assert(RegList.size() && "RegList should not be empty");
+ if (isVector)
+ OS << "\t.vsave\t{";
+ else
+ OS << "\t.save\t{";
+
+ InstPrinter.printRegName(OS, RegList[0]);
+
+ for (unsigned i = 1, e = RegList.size(); i != e; ++i) {
+ OS << ", ";
+ InstPrinter.printRegName(OS, RegList[i]);
+ }
+
+ OS << "}\n";
+}
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
+}
+void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value) << "\n";
+}
+void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef String) {
+ switch (Attribute) {
+ default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
+ case ARMBuildAttrs::CPU_name:
+ OS << "\t.cpu\t" << String.lower() << "\n";
+ break;
+ }
+}
+void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
+ OS << "\t.fpu\t" << GetFPUName(FPU) << "\n";
+}
+void ARMTargetAsmStreamer::finishAttributeSection() {
+}
+
+class ARMTargetELFStreamer : public ARMTargetStreamer {
+private:
+ // This structure holds all attributes, accounting for
+ // their string/numeric value, so we can later emmit them
+ // in declaration order, keeping all in the same vector
+ struct AttributeItem {
+ enum {
+ HiddenAttribute = 0,
+ NumericAttribute,
+ TextAttribute
+ } Type;
+ unsigned Tag;
+ unsigned IntValue;
+ StringRef StringValue;
+
+ static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
+ return (LHS.Tag < RHS.Tag);
+ }
+ };
+
+ StringRef CurrentVendor;
+ unsigned FPU;
+ SmallVector<AttributeItem, 64> Contents;
+
+ const MCSection *AttributeSection;
+
+ // FIXME: this should be in a more generic place, but
+ // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf
+ static size_t getULEBSize(int Value) {
+ size_t Size = 0;
+ do {
+ Value >>= 7;
+ Size += sizeof(int8_t); // Is this really necessary?
+ } while (Value);
+ return Size;
+ }
+
+ AttributeItem *getAttributeItem(unsigned Attribute) {
+ for (size_t i = 0; i < Contents.size(); ++i)
+ if (Contents[i].Tag == Attribute)
+ return &Contents[i];
+ return 0;
+ }
+
+ void setAttributeItem(unsigned Attribute, unsigned Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->IntValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::NumericAttribute,
+ Attribute,
+ Value,
+ StringRef("")
+ };
+ Contents.push_back(Item);
+ }
+
+ void setAttributeItem(unsigned Attribute, StringRef Value,
+ bool OverwriteExisting) {
+ // Look for existing attribute item
+ if (AttributeItem *Item = getAttributeItem(Attribute)) {
+ if (!OverwriteExisting)
+ return;
+ Item->StringValue = Value;
+ return;
+ }
+
+ // Create new attribute item
+ AttributeItem Item = {
+ AttributeItem::TextAttribute,
+ Attribute,
+ 0,
+ Value
+ };
+ Contents.push_back(Item);
+ }
+
+ void emitFPUDefaultAttributes();
+
+ ARMELFStreamer &getStreamer();
+
+ virtual void emitFnStart();
+ virtual void emitFnEnd();
+ virtual void emitCantUnwind();
+ virtual void emitPersonality(const MCSymbol *Personality);
+ virtual void emitHandlerData();
+ virtual void emitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+ virtual void emitPad(int64_t Offset);
+ virtual void emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector);
+
+ virtual void switchVendor(StringRef Vendor);
+ virtual void emitAttribute(unsigned Attribute, unsigned Value);
+ virtual void emitTextAttribute(unsigned Attribute, StringRef String);
+ virtual void emitFPU(unsigned FPU);
+ virtual void finishAttributeSection();
+
+ size_t calculateContentSize() const;
+
+public:
+ ARMTargetELFStreamer()
+ : ARMTargetStreamer(), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
+ AttributeSection(0) {
+ }
+};
+
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
/// the appropriate points in the object files. These symbols are defined in the
/// ARM ELF ABI: infocenter.arm.com/help/topic/com.arm.../IHI0044D_aaelf.pdf.
@@ -61,27 +276,29 @@ namespace {
/// by MachO. Beware!
class ARMELFStreamer : public MCELFStreamer {
public:
- ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
+ friend class ARMTargetELFStreamer;
+
+ ARMELFStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer,
+ MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool IsThumb)
+ : MCELFStreamer(Context, TargetStreamer, TAB, OS, Emitter),
IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
Reset();
}
~ARMELFStreamer() {}
+ virtual void FinishImpl();
+
// ARM exception handling directives
- virtual void EmitFnStart();
- virtual void EmitFnEnd();
- virtual void EmitCantUnwind();
- virtual void EmitPersonality(const MCSymbol *Per);
- virtual void EmitHandlerData();
- virtual void EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
- int64_t Offset = 0);
- virtual void EmitPad(int64_t Offset);
- virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector);
+ void emitFnStart();
+ void emitFnEnd();
+ void emitCantUnwind();
+ void emitPersonality(const MCSymbol *Per);
+ void emitHandlerData();
+ void emitSetFP(unsigned NewFpReg, unsigned NewSpReg, int64_t Offset = 0);
+ void emitPad(int64_t Offset);
+ void emitRegSave(const SmallVectorImpl<unsigned> &RegList, bool isVector);
virtual void ChangeSection(const MCSection *Section,
const MCExpr *Subsection) {
@@ -141,10 +358,6 @@ public:
}
}
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_ARMELFStreamer;
- }
-
private:
enum ElfMappingSymbol {
EMS_None,
@@ -183,7 +396,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
+ AssignSection(Symbol, getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -232,6 +445,224 @@ private:
};
} // end anonymous namespace
+ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
+ ARMELFStreamer *S = static_cast<ARMELFStreamer *>(Streamer);
+ return *S;
+}
+
+void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
+void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
+void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
+ getStreamer().emitPersonality(Personality);
+}
+void ARMTargetELFStreamer::emitHandlerData() {
+ getStreamer().emitHandlerData();
+}
+void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
+ int64_t Offset) {
+ getStreamer().emitSetFP(FpReg, SpReg, Offset);
+}
+void ARMTargetELFStreamer::emitPad(int64_t Offset) {
+ getStreamer().emitPad(Offset);
+}
+void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {
+ getStreamer().emitRegSave(RegList, isVector);
+}
+void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
+ assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+ if (CurrentVendor == Vendor)
+ return;
+
+ if (!CurrentVendor.empty())
+ finishAttributeSection();
+
+ assert(Contents.empty() &&
+ ".ARM.attributes should be flushed before changing vendor");
+ CurrentVendor = Vendor;
+
+}
+void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
+ StringRef Value) {
+ setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
+}
+void ARMTargetELFStreamer::emitFPU(unsigned Value) {
+ FPU = Value;
+}
+void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
+ switch (FPU) {
+ case ARM::VFP:
+ case ARM::VFPV2:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV3_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::VFPV4_D16:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4B,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv3A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_VFPV4:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPv4A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeon2,
+ /* OverwriteExisting= */ false);
+ break;
+
+ case ARM::NEON_FP_ARMV8:
+ case ARM::CRYPTO_NEON_FP_ARMV8:
+ setAttributeItem(ARMBuildAttrs::VFP_arch,
+ ARMBuildAttrs::AllowFPARMv8A,
+ /* OverwriteExisting= */ false);
+ setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
+ ARMBuildAttrs::AllowNeonARMv8,
+ /* OverwriteExisting= */ false);
+ break;
+
+ default:
+ report_fatal_error("Unknown FPU: " + Twine(FPU));
+ break;
+ }
+}
+size_t ARMTargetELFStreamer::calculateContentSize() const {
+ size_t Result = 0;
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ switch (item.Type) {
+ case AttributeItem::HiddenAttribute:
+ break;
+ case AttributeItem::NumericAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += getULEBSize(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Result += getULEBSize(item.Tag);
+ Result += item.StringValue.size() + 1; // string + '\0'
+ break;
+ }
+ }
+ return Result;
+}
+void ARMTargetELFStreamer::finishAttributeSection() {
+ // <format-version>
+ // [ <section-length> "vendor-name"
+ // [ <file-tag> <size> <attribute>*
+ // | <section-tag> <size> <section-number>* 0 <attribute>*
+ // | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+ // ]+
+ // ]*
+
+ if (FPU != ARM::INVALID_FPU)
+ emitFPUDefaultAttributes();
+
+ if (Contents.empty())
+ return;
+
+ std::sort(Contents.begin(), Contents.end(), AttributeItem::LessTag);
+
+ ARMELFStreamer &Streamer = getStreamer();
+
+ // Switch to .ARM.attributes section
+ if (AttributeSection) {
+ Streamer.SwitchSection(AttributeSection);
+ } else {
+ AttributeSection =
+ Streamer.getContext().getELFSection(".ARM.attributes",
+ ELF::SHT_ARM_ATTRIBUTES,
+ 0,
+ SectionKind::getMetadata());
+ Streamer.SwitchSection(AttributeSection);
+
+ // Format version
+ Streamer.EmitIntValue(0x41, 1);
+ }
+
+ // Vendor size + Vendor name + '\0'
+ const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+ // Tag + Tag Size
+ const size_t TagHeaderSize = 1 + 4;
+
+ const size_t ContentsSize = calculateContentSize();
+
+ Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+ Streamer.EmitBytes(CurrentVendor);
+ Streamer.EmitIntValue(0, 1); // '\0'
+
+ Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+ Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+ // Size should have been accounted for already, now
+ // emit each field as its type (ULEB or String)
+ for (size_t i = 0; i < Contents.size(); ++i) {
+ AttributeItem item = Contents[i];
+ Streamer.EmitULEB128IntValue(item.Tag);
+ switch (item.Type) {
+ default: llvm_unreachable("Invalid attribute type");
+ case AttributeItem::NumericAttribute:
+ Streamer.EmitULEB128IntValue(item.IntValue);
+ break;
+ case AttributeItem::TextAttribute:
+ Streamer.EmitBytes(item.StringValue.upper());
+ Streamer.EmitIntValue(0, 1); // '\0'
+ break;
+ }
+ }
+
+ Contents.clear();
+ FPU = ARM::INVALID_FPU;
+}
+
+void ARMELFStreamer::FinishImpl() {
+ MCTargetStreamer &TS = getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.finishAttributeSection();
+
+ MCELFStreamer::FinishImpl();
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -295,29 +726,13 @@ void ARMELFStreamer::Reset() {
UnwindOpAsm.Reset();
}
-// Add the R_ARM_NONE fixup at the same position
-void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
- const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
-
- const MCSymbolRefExpr *PersonalityRef =
- MCSymbolRefExpr::Create(PersonalitySym,
- MCSymbolRefExpr::VK_ARM_NONE,
- getContext());
-
- AddValueSymbols(PersonalityRef);
- MCDataFragment *DF = getOrCreateDataFragment();
- DF->getFixups().push_back(
- MCFixup::Create(DF->getContents().size(), PersonalityRef,
- MCFixup::getKindForSize(4, false)));
-}
-
-void ARMELFStreamer::EmitFnStart() {
+void ARMELFStreamer::emitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
EmitLabel(FnStart);
}
-void ARMELFStreamer::EmitFnEnd() {
+void ARMELFStreamer::emitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
@@ -365,8 +780,20 @@ void ARMELFStreamer::EmitFnEnd() {
Reset();
}
-void ARMELFStreamer::EmitCantUnwind() {
- CantUnwind = true;
+void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
+
+// Add the R_ARM_NONE fixup at the same position
+void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
+ const MCSymbol *PersonalitySym = getContext().GetOrCreateSymbol(Name);
+
+ const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
+ PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
+
+ AddValueSymbols(PersonalityRef);
+ MCDataFragment *DF = getOrCreateDataFragment();
+ DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
+ PersonalityRef,
+ MCFixup::getKindForSize(4, false)));
}
void ARMELFStreamer::FlushPendingOffset() {
@@ -429,17 +856,14 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
EmitIntValue(0, 4);
}
-void ARMELFStreamer::EmitHandlerData() {
- FlushUnwindOpcodes(false);
-}
+void ARMELFStreamer::emitHandlerData() { FlushUnwindOpcodes(false); }
-void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
+void ARMELFStreamer::emitPersonality(const MCSymbol *Per) {
Personality = Per;
UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
- unsigned NewSPReg,
+void ARMELFStreamer::emitSetFP(unsigned NewFPReg, unsigned NewSPReg,
int64_t Offset) {
assert((NewSPReg == ARM::SP || NewSPReg == FPReg) &&
"the operand of .setfp directive should be either $sp or $fp");
@@ -453,7 +877,7 @@ void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
FPOffset += Offset;
}
-void ARMELFStreamer::EmitPad(int64_t Offset) {
+void ARMELFStreamer::emitPad(int64_t Offset) {
// Track the change of the $sp offset
SPOffset -= Offset;
@@ -462,7 +886,7 @@ void ARMELFStreamer::EmitPad(int64_t Offset) {
PendingOffset -= Offset;
}
-void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+void ARMELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
// Collect the registers in the register list
unsigned Count = 0;
@@ -493,11 +917,31 @@ void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
}
namespace llvm {
+
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+ ARMTargetAsmStreamer *S = new ARMTargetAsmStreamer(OS, *InstPrint);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
+}
+
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack,
bool IsThumb) {
- ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
+ ARMTargetELFStreamer *TS = new ARMTargetELFStreamer();
+ ARMELFStreamer *S =
+ new ARMELFStreamer(Context, TS, TAB, OS, Emitter, IsThumb);
+ // FIXME: This should eventually end up somewhere else where more
+ // intelligent flag decisions can be made. For now we are just maintaining
+ // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
+ S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
if (NoExecStack)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
deleted file mode 100644
index 77ae5d2..0000000
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- ARMELFStreamer.h - ELF Streamer for ARM ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF streamer information for the ARM backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM_ELF_STREAMER_H
-#define ARM_ELF_STREAMER_H
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-
- MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack,
- bool IsThumb);
-}
-
-#endif // ARM_ELF_STREAMER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index c1aab9c..ad796e6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -49,8 +49,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
- WeakRefDirective = "\t.weak\t";
-
HasLEB128 = true;
SupportsDebugInformation = true;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index f0b289c..e1f716d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -15,6 +15,7 @@
#define LLVM_ARMTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit ARMMCAsmInfoDarwin();
};
- class ARMELFMCAsmInfo : public MCAsmInfo {
+ class ARMELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit ARMELFMCAsmInfo();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index a18d465..4382d0d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -58,8 +58,7 @@ public:
}
bool isTargetDarwin() const {
Triple TT(STI.getTargetTriple());
- Triple::OSType OS = TT.getOS();
- return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+ return TT.isOSDarwin();
}
unsigned getMachineSoImmOpValue(unsigned SoImm) const;
@@ -638,8 +637,14 @@ getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
uint32_t ARMMCCodeEmitter::
getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Val =
- ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ unsigned Val = 0;
+ const MCOperand MO = MI.getOperand(OpIdx);
+
+ if(MO.isExpr())
+ return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+ else
+ Val = MO.getImm() >> 1;
+
bool I = (Val & 0x800000);
bool J1 = (Val & 0x400000);
bool J2 = (Val & 0x200000);
@@ -665,7 +670,7 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
if (MO.isExpr())
return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
Fixups);
- int32_t offset = MO.getImm();
+ int64_t offset = MO.getImm();
uint32_t Val = 0x2000;
int SoImmVal;
@@ -772,8 +777,10 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
} else {
Reg = ARM::PC;
int32_t Offset = MO.getImm();
- // FIXME: Handle #-0.
- if (Offset < 0) {
+ if (Offset == INT32_MIN) {
+ Offset = 0;
+ isAdd = false;
+ } else if (Offset < 0) {
Offset *= -1;
isAdd = false;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index caa1949..a99de0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,30 +12,73 @@
//===----------------------------------------------------------------------===//
#include "ARMBaseInfo.h"
-#include "ARMELFStreamer.h"
#include "ARMMCAsmInfo.h"
#include "ARMMCTargetDesc.h"
#include "InstPrinter/ARMInstPrinter.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_REGINFO_MC_DESC
#include "ARMGenRegisterInfo.inc"
+static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV7Ops &&
+ (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
+ (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
+ // Checks for the deprecated CP15ISB encoding:
+ // mcr p15, #0, rX, c7, c5, #4
+ (MI.getOperand(3).isImm() && MI.getOperand(3).getImm() == 7)) {
+ if ((MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 4)) {
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 5) {
+ Info = "deprecated since v7, use 'isb'";
+ return true;
+ }
+
+ // Checks for the deprecated CP15DSB encoding:
+ // mcr p15, #0, rX, c7, c10, #4
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10) {
+ Info = "deprecated since v7, use 'dsb'";
+ return true;
+ }
+ }
+ // Checks for the deprecated CP15DMB encoding:
+ // mcr p15, #0, rX, c7, c10, #5
+ if (MI.getOperand(4).isImm() && MI.getOperand(4).getImm() == 10 &&
+ (MI.getOperand(5).isImm() && MI.getOperand(5).getImm() == 5)) {
+ Info = "deprecated since v7, use 'dmb'";
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+ std::string &Info) {
+ if (STI.getFeatureBits() & llvm::ARM::HasV8Ops &&
+ MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) {
+ Info = "applying IT instruction to more than one subsequent instruction is deprecated";
+ return true;
+ }
+
+ return false;
+}
+
#define GET_INSTRINFO_MC_DESC
#include "ARMGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "ARMGenSubtargetInfo.inc"
-using namespace llvm;
std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
Triple triple(TT);
@@ -60,8 +103,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
if (Idx) {
unsigned SubVer = TT[Idx];
if (SubVer == '8') {
- // FIXME: Parse v8 features
- ARMArchFeature = "+v8";
+ if (NoCPU)
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP,
+ // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8";
} else if (SubVer == '7') {
if (Len >= Idx+2 && TT[Idx+1] == 'm') {
isThumb = true;
@@ -106,7 +154,7 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
isThumb = true;
if (NoCPU)
// v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6,+noarm,+mclass";
+ ARMArchFeature = "+v6m,+noarm,+mclass";
else
ARMArchFeature = "+v6";
} else
@@ -307,6 +355,10 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(TheARMTarget, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheThumbTarget, createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheARMTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheThumbTarget, createMCAsmStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 4e94c53..959be8b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -18,13 +18,16 @@
#include <string>
namespace llvm {
+class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
+class MCInstPrinter;
class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
+class MCStreamer;
class MCRelocationInfo;
class StringRef;
class Target;
@@ -42,12 +45,19 @@ namespace ARM_MC {
StringRef FS);
}
+MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst);
+
MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b9efe74..1f681ba 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -20,10 +20,9 @@
#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
-using namespace llvm::object;
namespace {
class ARMMachObjectWriter : public MCMachObjectTargetWriter {
@@ -63,7 +62,7 @@ public:
static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
unsigned &Log2Size) {
- RelocType = unsigned(macho::RIT_Vanilla);
+ RelocType = unsigned(MachO::ARM_RELOC_VANILLA);
Log2Size = ~0U;
switch (Kind) {
@@ -92,21 +91,21 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_uncondbl:
case ARM::fixup_arm_condbl:
case ARM::fixup_arm_blx:
- RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+ RelocType = unsigned(MachO::ARM_RELOC_BR24);
// Report as 'long', even though that is not quite accurate.
Log2Size = llvm::Log2_32(4);
return true;
// Handle Thumb branches.
case ARM::fixup_arm_thumb_br:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(2);
return true;
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
- RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+ RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
Log2Size = llvm::Log2_32(4);
return true;
@@ -121,23 +120,23 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
// 1 - thumb instructions
case ARM::fixup_arm_movt_hi16:
case ARM::fixup_arm_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 1;
return true;
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movt_hi16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 3;
return true;
case ARM::fixup_arm_movw_lo16:
case ARM::fixup_arm_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 0;
return true;
case ARM::fixup_t2_movw_lo16:
case ARM::fixup_t2_movw_lo16_pcrel:
- RelocType = unsigned(macho::RIT_ARM_Half);
+ RelocType = unsigned(MachO::ARM_RELOC_HALF);
Log2Size = 2;
return true;
}
@@ -153,7 +152,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_ARM_Half;
+ unsigned Type = MachO::ARM_RELOC_HALF;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -179,7 +178,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_ARM_HalfDifference;
+ Type = MachO::ARM_RELOC_HALF_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
@@ -223,29 +222,29 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
break;
}
- if (Type == macho::RIT_ARM_HalfDifference) {
+ if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) {
uint32_t OtherHalf = MovtBit
? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
- macho::RelocationEntry MRE;
- MRE.Word0 = ((OtherHalf << 0) |
- (macho::RIT_Pair << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((OtherHalf << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (MovtBit << 28) |
- (ThumbBit << 29) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (MovtBit << 28) |
+ (ThumbBit << 29) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -259,7 +258,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_Vanilla;
+ unsigned Type = MachO::ARM_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -284,31 +283,31 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
"' can not be undefined in a subtraction expression");
// Select the appropriate difference relocation type.
- Type = macho::RIT_Difference;
+ Type = MachO::ARM_RELOC_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == macho::RIT_Difference ||
- Type == macho::RIT_Generic_LocalDifference) {
- macho::RelocationEntry MRE;
- MRE.Word0 = ((0 << 0) |
- (macho::RIT_Pair << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ if (Type == MachO::ARM_RELOC_SECTDIFF ||
+ Type == MachO::ARM_RELOC_LOCAL_SECTDIFF) {
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) |
+ (MachO::ARM_RELOC_PAIR << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -326,13 +325,13 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
switch (RelocType) {
default:
return false;
- case macho::RIT_ARM_Branch24Bit:
+ case MachO::ARM_RELOC_BR24:
// PC pre-adjustment of 8 for these instructions.
Value -= 8;
// ARM BL/BLX has a 25-bit offset.
Range = 0x1ffffff;
break;
- case macho::RIT_ARM_ThumbBranch22Bit:
+ case MachO::ARM_THUMB_RELOC_BR22:
// PC pre-adjustment of 4 for these instructions.
Value -= 4;
// Thumb BL/BLX has a 24-bit offset.
@@ -361,7 +360,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
- unsigned RelocType = macho::RIT_Vanilla;
+ unsigned RelocType = MachO::ARM_RELOC_VANILLA;
if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
@@ -374,7 +373,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
// scattered relocation entry. Differences always require scattered
// relocations.
if (Target.getSymB()) {
- if (RelocType == macho::RIT_ARM_Half)
+ if (RelocType == MachO::ARM_RELOC_HALF)
return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -392,7 +391,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
//
// Is this right for ARM?
uint32_t Offset = Target.getConstant();
- if (IsPCRel && RelocType == macho::RIT_Vanilla)
+ if (IsPCRel && RelocType == MachO::ARM_RELOC_VANILLA)
Offset += 1 << Log2Size;
if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
@@ -445,17 +444,17 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
// Even when it's not a scattered relocation, movw/movt always uses
// a PAIR relocation.
- if (Type == macho::RIT_ARM_Half) {
+ if (Type == MachO::ARM_RELOC_HALF) {
// The other-half value only gets populated for the movt and movw
// relocation entries.
uint32_t Value = 0;
@@ -474,11 +473,11 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Value = FixedValue & 0xffff;
break;
}
- macho::RelocationEntry MREPair;
- MREPair.Word0 = Value;
- MREPair.Word1 = ((0xffffff) |
- (Log2Size << 25) |
- (macho::RIT_Pair << 28));
+ MachO::any_relocation_info MREPair;
+ MREPair.r_word0 = Value;
+ MREPair.r_word1 = ((0xffffff << 0) |
+ (Log2Size << 25) |
+ (MachO::ARM_RELOC_PAIR << 28));
Writer->addRelocation(Fragment->getParent(), MREPair);
}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index db49db8..cfb33f5 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -127,7 +127,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::LR:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- AFI->addGPRCalleeSavedArea1Frame(FI);
GPRCS1Size += 4;
break;
case ARM::R8:
@@ -136,16 +135,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R11:
if (Reg == FramePtr)
FramePtrSpillFI = FI;
- if (STI.isTargetIOS()) {
- AFI->addGPRCalleeSavedArea2Frame(FI);
+ if (STI.isTargetIOS())
GPRCS2Size += 4;
- } else {
- AFI->addGPRCalleeSavedArea1Frame(FI);
+ else
GPRCS1Size += 4;
- }
break;
default:
- AFI->addDPRCalleeSavedAreaFrame(FI);
DPRCSSize += 8;
}
}
@@ -169,10 +164,17 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
NumBytes = DPRCSOffset;
+ int FramePtrOffsetInBlock = 0;
+ if (tryFoldSPUpdateIntoPushPop(MF, prior(MBBI), NumBytes)) {
+ FramePtrOffsetInBlock = NumBytes;
+ NumBytes = 0;
+ }
+
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
+ FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addFrameIndex(FramePtrSpillFI).addImm(0)
+ .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
if (NumBytes > 508)
// If offset is > 508 then sp cannot be adjusted in a single instruction,
@@ -213,13 +215,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
@@ -296,8 +291,9 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
&MBB.front() != MBBI &&
prior(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = prior(MBBI);
- emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
- } else
+ if (!tryFoldSPUpdateIntoPushPop(MF, PMBBI, NumBytes))
+ emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+ } else if (!tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes))
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
}
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6722614..65a7221 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -426,7 +426,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
*this);
} else {
// Translate r0 = add sp, -imm to
- // r0 = -imm (this is then translated into a series of instructons)
+ // r0 = -imm (this is then translated into a series of instructions)
// r0 = add r0, sp
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
@@ -573,11 +573,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MF.getFrameInfo()->getStackSize() + SPAdj;
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ if (MF.getFrameInfo()->hasVarSizedObjects()) {
assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
"Unexpected");
// There are alloca()'s in this function, must reference off the frame
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d8596d7..0b7d3bb 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -28,6 +28,7 @@ namespace {
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+ bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
@@ -192,37 +193,42 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // Branches, including tricky ones like LDM_RET, need to end an IT
- // block so check the instruction we just put in the block.
- for (; MBBI != E && Pos &&
- (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- MI = NMI;
-
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC) {
- Mask |= (NCC & 1) << Pos;
- // Add implicit use of ITSTATE.
- NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
- true/*isImp*/, false/*isKill*/));
- LastITMI = NMI;
- } else {
- if (NCC == ARMCC::AL &&
- MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
- --MBBI;
- MBB.remove(NMI);
- MBB.insert(InsertPos, NMI);
- ++NumMovedInsts;
+
+ // v8 IT blocks are limited to one conditional op unless -arm-no-restrict-it
+ // is set: skip the loop
+ if (!restrictIT) {
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
}
- break;
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
}
- TrackDefUses(NMI, Defs, Uses, TRI);
- --Pos;
}
// Finalize IT mask.
@@ -250,6 +256,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
+ restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT();
if (!AFI->isThumbFunction())
return false;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 286eaa0..91788ac 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -36,7 +36,8 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- NopInst.setOpcode(ARM::tNOP);
+ NopInst.setOpcode(ARM::tHINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
NopInst.addOperand(MCOperand::CreateReg(0));
}
@@ -214,6 +215,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned DestReg, unsigned BaseReg, int NumBytes,
ARMCC::CondCodes Pred, unsigned PredReg,
const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+ if (NumBytes == 0 && DestReg != BaseReg) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+ return;
+ }
+
bool isSub = NumBytes < 0;
if (isSub) NumBytes = -NumBytes;
@@ -334,6 +342,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi12: return ARM::t2STRi8;
case ARM::t2STRBi12: return ARM::t2STRBi8;
case ARM::t2STRHi12: return ARM::t2STRHi8;
+ case ARM::t2PLDi12: return ARM::t2PLDi8;
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
@@ -343,6 +352,7 @@ negativeOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
@@ -364,6 +374,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi8: return ARM::t2STRi12;
case ARM::t2STRBi8: return ARM::t2STRBi12;
case ARM::t2STRHi8: return ARM::t2STRHi12;
+ case ARM::t2PLDi8: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -373,6 +384,7 @@ positiveOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
return opcode;
default:
@@ -394,6 +406,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRs: return ARM::t2STRi12;
case ARM::t2STRBs: return ARM::t2STRBi12;
case ARM::t2STRHs: return ARM::t2STRHi12;
+ case ARM::t2PLDs: return ARM::t2PLDi12;
case ARM::t2LDRi12:
case ARM::t2LDRHi12:
@@ -403,6 +416,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi12:
case ARM::t2STRBi12:
case ARM::t2STRHi12:
+ case ARM::t2PLDi12:
case ARM::t2LDRi8:
case ARM::t2LDRHi8:
case ARM::t2LDRBi8:
@@ -411,6 +425,7 @@ immediateOffsetOpcode(unsigned opcode)
case ARM::t2STRi8:
case ARM::t2STRBi8:
case ARM::t2STRHi8:
+ case ARM::t2PLDi8:
return opcode;
default:
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 0ddcad2..ddc7a66 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
+#include <cctype>
#include <cstdio>
#include <map>
#include <set>
@@ -291,8 +292,6 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkOnceAnyLinkage "; break;
case GlobalValue::LinkOnceODRLinkage:
Out << "GlobalValue::LinkOnceODRLinkage "; break;
- case GlobalValue::LinkOnceODRAutoHideLinkage:
- Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
case GlobalValue::WeakAnyLinkage:
Out << "GlobalValue::WeakAnyLinkage"; break;
case GlobalValue::WeakODRLinkage:
@@ -497,6 +496,7 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
HANDLE_ATTR(ReadOnly);
HANDLE_ATTR(NoInline);
HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeNone);
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
@@ -1139,7 +1139,7 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- const IntegersSubset CaseVal = i.getCaseValueEx();
+ const ConstantInt* CaseVal = i.getCaseValue();
const BasicBlock *BB = i.getCaseSuccessor();
Out << iName << "->addCase("
<< getOpName(CaseVal) << ", "
@@ -1160,8 +1160,7 @@ void CppWriter::printInstruction(const Instruction *I,
break;
}
case Instruction::Resume: {
- Out << "ResumeInst::Create(mod->getContext(), " << opNames[0]
- << ", " << bbname << ");";
+ Out << "ResumeInst::Create(" << opNames[0] << ", " << bbname << ");";
break;
}
case Instruction::Invoke: {
@@ -1175,7 +1174,7 @@ void CppWriter::printInstruction(const Instruction *I,
}
// FIXME: This shouldn't use magic numbers -3, -2, and -1.
Out << "InvokeInst *" << iName << " = InvokeInst::Create("
- << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getCalledValue()) << ", "
<< getOpName(inv->getNormalDest()) << ", "
<< getOpName(inv->getUnwindDest()) << ", "
<< iName << "_params, \"";
@@ -1589,6 +1588,20 @@ void CppWriter::printInstruction(const Instruction *I,
Out << "\");";
break;
}
+ case Instruction::LandingPad: {
+ const LandingPadInst *lpi = cast<LandingPadInst>(I);
+ Out << "LandingPadInst* " << iName << " = LandingPadInst::Create(";
+ printCppName(lpi->getType());
+ Out << ", " << opNames[0] << ", " << lpi->getNumClauses() << ", \"";
+ printEscapedString(lpi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCleanup("
+ << (lpi->isCleanup() ? "true" : "false")
+ << ");";
+ for (unsigned i = 0, e = lpi->getNumClauses(); i != e; ++i)
+ nl(Out) << iName << "->addClause(" << opNames[i+1] << ");";
+ break;
+ }
}
DefinedValues.insert(I);
nl(Out);
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 2b79791..ae3c9eb 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(HexagonCodeGen
HexagonFrameLowering.cpp
HexagonHardwareLoops.cpp
HexagonFixupHwLoops.cpp
+ HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
HexagonInstrInfo.cpp
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 88cd3fb..a2e04ba 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -99,7 +99,7 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
return;
case MachineOperand::MO_GlobalAddress:
// Computing the address of a global symbol, not calling it.
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
printOffset(MO.getOffset(), O);
return;
}
@@ -267,7 +267,7 @@ void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
"Expecting global address");
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
if (MO.getOffset() != 0) {
O << " + ";
O << MO.getOffset();
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 3c4ca0f..52d5ab2 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -871,7 +871,7 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(
/// \brief - Return true if the loop contains an instruction that inhibits
/// the use of the hardware loop function.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
- const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+ const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9e78e51..5ae9328 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1344,8 +1344,10 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 567faca..1374179 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -39,13 +39,24 @@
using namespace llvm;
-const unsigned Hexagon_MAX_RET_SIZE = 64;
-
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
-int NumNamedVarArgParams = -1;
+namespace {
+class HexagonCCState : public CCState {
+ int NumNamedVarArgParams;
+
+public:
+ HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
+ LLVMContext &C, int NumNamedVarArgParams)
+ : CCState(CC, isVarArg, MF, TM, locs, C),
+ NumNamedVarArgParams(NumNamedVarArgParams) {}
+
+ int getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+};
+}
// Implement calling convention for Hexagon.
static bool
@@ -82,12 +93,13 @@ static bool
CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ HexagonCCState &HState = static_cast<HexagonCCState &>(State);
// NumNamedVarArgParams can not be zero for a VarArg function.
- assert ( (NumNamedVarArgParams > 0) &&
- "NumNamedVarArgParams is not bigger than zero.");
+ assert((HState.getNumNamedVarArgParams() > 0) &&
+ "NumNamedVarArgParams is not bigger than zero.");
- if ( (int)ValNo < NumNamedVarArgParams ) {
+ if ((int)ValNo < HState.getNumNamedVarArgParams()) {
// Deal with named arguments.
return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
}
@@ -394,13 +406,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
// Check for varargs.
- NumNamedVarArgParams = -1;
+ int NumNamedVarArgParams = -1;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
{
const Function* CalleeFn = NULL;
@@ -417,6 +424,12 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext(),
+ NumNamedVarArgParams);
+
if (NumNamedVarArgParams > 0)
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
else
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4fe0107..73da226 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -141,8 +141,11 @@ namespace llvm {
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
- return MVT::i1;
+ virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i1;
+ else
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
}
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index e71386a..d25bfa8 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -63,7 +63,7 @@ class MemAccessSize<bits<3> value> {
def NoMemAccess : MemAccessSize<0>;// Not a memory acces instruction.
def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
-def WordAccess : MemAccessSize<3>;// Word access instrution (memw).
+def WordAccess : MemAccessSize<3>;// Word access instruction (memw).
def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 5af645c..6b97609 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "HexagonGenInstrInfo.inc"
#include "HexagonGenDFAPacketizer.inc"
@@ -55,6 +55,8 @@ const int Hexagon_MEMH_AUTOINC_MIN = -16;
const int Hexagon_MEMB_AUTOINC_MAX = 7;
const int Hexagon_MEMB_AUTOINC_MIN = -8;
+// Pin the vtable to this file.
+void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 3c28df4..3f45b8b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -26,6 +26,7 @@
namespace llvm {
class HexagonInstrInfo : public HexagonGenInstrInfo {
+ virtual void anchor();
const HexagonRegisterInfo RI;
const HexagonSubtarget &Subtarget;
typedef unsigned Opcode_t;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index fee83fb..475c23d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3390,4 +3390,3 @@ def : Pat<(i32 (load FoldGlobalAddrGP:$addr)),
def : Pat<(atomic_load_32 FoldGlobalAddrGP:$addr),
(i32 (LDriw_abs_V4 FoldGlobalAddrGP:$addr))>,
Requires<[HasV4T]>;
-
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index f011d51..bbb2fa4 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -73,7 +73,7 @@ void llvm::HexagonLowerToMC(const MachineInstr* MI, HexagonMCInst& MCI,
AP.OutContext));
break;
case MachineOperand::MO_GlobalAddress:
- MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+ MCO = GetSymbolRef(MO, AP.getSymbol(MO.getGlobal()), AP);
break;
case MachineOperand::MO_ExternalSymbol:
MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
new file mode 100644
index 0000000..9579c8b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp
@@ -0,0 +1,16 @@
+//= HexagonMachineFunctionInfo.cpp - Hexagon machine function info *- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMachineFunctionInfo.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void HexagonMachineFunctionInfo::anchor() {}
+
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index bd7b26a..a59c8c9 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//=- HexagonMachineFunctionInfo.h - Hexagon machine function info -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -10,6 +10,7 @@
#ifndef HexagonMACHINEFUNCTIONINFO_H
#define HexagonMACHINEFUNCTIONINFO_H
+#include <map>
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -30,9 +31,8 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
int VarArgsFrameIndex;
bool HasClobberLR;
bool HasEHReturn;
-
std::map<const MachineInstr*, unsigned> PacketInfo;
-
+ virtual void anchor();
public:
HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 10bb3e9..c94f081 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -407,11 +407,11 @@ SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
#ifndef NDEBUG
void ConvergingVLIWScheduler::traceCandidate(const char *Label,
const ReadyQueue &Q,
- SUnit *SU, PressureElement P) {
+ SUnit *SU, PressureChange P) {
dbgs() << Label << " " << Q.getName() << " ";
if (P.isValid())
- dbgs() << DAG->TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
- << " ";
+ dbgs() << DAG->TRI->getRegPressureSetName(P.getPSet()) << ":"
+ << P.getUnitInc() << " ";
else
dbgs() << " ";
SU->dump(DAG);
@@ -457,9 +457,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
// Constants used to denote relative importance of
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
-static const unsigned PriorityTwo = 100;
-static const unsigned PriorityThree = 50;
-static const unsigned PriorityFour = 20;
+static const unsigned PriorityTwo = 50;
static const unsigned ScaleTwo = 10;
static const unsigned FactorOne = 2;
@@ -517,8 +515,8 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
ResCount += (NumNodesBlocking * ScaleTwo);
// Factor in reg pressure as a heuristic.
- ResCount -= (Delta.Excess.UnitIncrease*PriorityThree);
- ResCount -= (Delta.CriticalMax.UnitIncrease*PriorityThree);
+ ResCount -= (Delta.Excess.getUnitInc()*PriorityTwo);
+ ResCount -= (Delta.CriticalMax.getUnitInc()*PriorityTwo);
DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 171193e..8ac333f 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -233,7 +233,7 @@ protected:
SchedCandidate &Candidate);
#ifndef NDEBUG
void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
- PressureElement P = PressureElement());
+ PressureChange P = PressureChange());
#endif
};
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 89e3406..5490ecd 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -29,7 +29,7 @@
//
// Note: The peephole pass makes the instrucstions like
// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
-// redundant and relies on some form of dead removal instrucions, like
+// redundant and relies on some form of dead removal instructions, like
// DCE or DIE to actually eliminate them.
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d5ca4d7..1786e9d 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -295,13 +295,5 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
return Hexagon::R29;
}
-unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
#define GET_REGINFO_TARGET_DESC
#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index fec86df..89af7c3 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -78,10 +78,6 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getFrameRegister() const;
unsigned getStackRegister() const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 3bf2f20..5166f8e 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -16,33 +16,31 @@
// scheduled after register allocation.
//
//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "xfer"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
-#include "HexagonMachineFunctionInfo.h"
#include <map>
-#include <iostream>
-
-#include "llvm/Support/CommandLine.h"
-#define DEBUG_TYPE "xfer"
-
using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 07d5ce1..fca6707 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -86,3 +86,5 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
ModeIEEERndNear = false;
}
+// Pin the vtable to this file.
+void HexagonSubtarget::anchor() {}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 76a8fba..690bef0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -27,7 +27,7 @@
namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
-
+ virtual void anchor();
bool UseMemOps;
bool ModeIEEERndNear;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index cd96b58..bb950a0 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -102,17 +102,25 @@ class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
- // Enable MI scheduler.
- if (!DisableHexagonMISched) {
+ // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
+ // HexagonSubtarget::enableMachineScheduler() { return true; }.
+ // That will bypass the SelectionDAG VLIW scheduler, which is probably just
+ // hurting compile time and will be removed eventually anyway.
+ if (DisableHexagonMISched)
+ disablePass(&MachineSchedulerID);
+ else
enablePass(&MachineSchedulerID);
- MachineSchedRegistry::setDefault(createVLIWMachineSched);
- }
}
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
}
+ virtual ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const {
+ return createVLIWMachineSched(C);
+ }
+
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
virtual bool addPostRegAlloc();
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 2ea0d2e..7c41507 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -21,7 +21,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
using namespace llvm;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index e0f5a27..8519cf3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -73,7 +73,7 @@ namespace HexagonII {
NoMemAccess = 0, // Not a memory acces instruction.
ByteAccess = 1, // Byte access instruction (memb).
HalfWordAccess = 2, // Half word access instruction (memh).
- WordAccess = 3, // Word access instrution (memw).
+ WordAccess = 3, // Word access instruction (memw).
DoubleWordAccess = 4 // Double word access instruction (memd)
};
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 495dbb9..3f9415b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -15,6 +15,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+void HexagonMCAsmInfo::anchor() {}
+
HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
@@ -29,7 +32,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
InlineAsmEnd = "# InlineAsm End";
ZeroDirective = "\t.space\t";
AscizDirective = "\t.string\t";
- WeakRefDirective = "\t.weak\t";
SupportsDebugInformation = true;
UsesELFSectionDirectiveForBSS = true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index 0b94d21..bd8cb76 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -15,10 +15,11 @@
#define HexagonMCASMINFO_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class HexagonMCAsmInfo : public MCAsmInfo {
+ class HexagonMCAsmInfo : public MCAsmInfoELF {
+ virtual void anchor();
public:
explicit HexagonMCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index d213a45..acf2ab8 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -21,11 +21,8 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(StringRef TT) {
PointerSize = CalleeSaveStackSlotSize = 2;
PrivateGlobalPrefix = ".L";
- WeakRefDirective ="\t.weak\t";
- PCSymbol=".";
CommentString = ";";
AlignmentIsInBytes = false;
- AllowNameToStartWithDigit = true;
UsesELFSectionDirectiveForBSS = true;
}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index feb040d..a7e0e58 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef MSP430TARGETASMINFO_H
#define MSP430TARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class MSP430MCAsmInfo : public MCAsmInfo {
+ class MSP430MCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit MSP430MCAsmInfo(StringRef TT);
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 0a04e5d..18311c3 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
if (Offset)
O << '(' << Offset << '+';
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
if (Offset)
O << ')';
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index b448cc4..8a69d1e 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -23,18 +23,15 @@ def RetCC_MSP430 : CallingConv<[
//===----------------------------------------------------------------------===//
// MSP430 Argument Calling Conventions
//===----------------------------------------------------------------------===//
-def CC_MSP430 : CallingConv<[
+def CC_MSP430_AssignStack : CallingConv<[
// Pass by value if the byval attribute is given
CCIfByVal<CCPassByVal<2, 2>>,
// Promote i8 arguments to i16.
CCIfType<[i8], CCPromoteToType<i16>>,
- // The first 4 integer arguments of non-varargs functions are passed in
- // integer registers.
- CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
-
// Integer values get stored in stack slots that are 2 bytes in
// size and 2-byte aligned.
CCIfType<[i16], CCAssignToStack<2, 2>>
]>;
+
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index c673f59..8370714 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -27,8 +27,8 @@ protected:
public:
explicit MSP430FrameLowering(const MSP430Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
- }
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2),
+ STI(sti) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 543f54c..4152829 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -395,6 +395,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
DEBUG(errs() << "== ";
Node->dump(CurDAG);
errs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 803e899..745cdf5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -45,7 +45,7 @@ typedef enum {
} HWMultUseMode;
static cl::opt<HWMultUseMode>
-HWMultMode("msp430-hwmult-mode",
+HWMultMode("msp430-hwmult-mode", cl::Hidden,
cl::desc("Hardware multiplier use mode"),
cl::init(HWMultNoIntr),
cl::values(
@@ -250,6 +250,123 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
#include "MSP430GenCallingConv.inc"
+/// For each argument in a function store the number of pieces it is composed
+/// of.
+template<typename ArgT>
+static void ParseFunctionArgs(const SmallVectorImpl<ArgT> &Args,
+ SmallVectorImpl<unsigned> &Out) {
+ unsigned CurrentArgIndex = ~0U;
+ for (unsigned i = 0, e = Args.size(); i != e; i++) {
+ if (CurrentArgIndex == Args[i].OrigArgIndex) {
+ Out.back()++;
+ } else {
+ Out.push_back(1);
+ CurrentArgIndex++;
+ }
+ }
+}
+
+static void AnalyzeVarArgs(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ State.AnalyzeCallOperands(Outs, CC_MSP430_AssignStack);
+}
+
+static void AnalyzeVarArgs(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ State.AnalyzeFormalArguments(Ins, CC_MSP430_AssignStack);
+}
+
+/// Analyze incoming and outgoing function arguments. We need custom C++ code
+/// to handle special constraints in the ABI like reversing the order of the
+/// pieces of splitted arguments. In addition, all pieces of a certain argument
+/// have to be passed either using registers or the stack but never mixing both.
+template<typename ArgT>
+static void AnalyzeArguments(CCState &State,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ const SmallVectorImpl<ArgT> &Args) {
+ static const uint16_t RegList[] = {
+ MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W
+ };
+ static const unsigned NbRegs = array_lengthof(RegList);
+
+ if (State.isVarArg()) {
+ AnalyzeVarArgs(State, Args);
+ return;
+ }
+
+ SmallVector<unsigned, 4> ArgsParts;
+ ParseFunctionArgs(Args, ArgsParts);
+
+ unsigned RegsLeft = NbRegs;
+ bool UseStack = false;
+ unsigned ValNo = 0;
+
+ for (unsigned i = 0, e = ArgsParts.size(); i != e; i++) {
+ MVT ArgVT = Args[ValNo].VT;
+ ISD::ArgFlagsTy ArgFlags = Args[ValNo].Flags;
+ MVT LocVT = ArgVT;
+ CCValAssign::LocInfo LocInfo = CCValAssign::Full;
+
+ // Promote i8 to i16
+ if (LocVT == MVT::i8) {
+ LocVT = MVT::i16;
+ if (ArgFlags.isSExt())
+ LocInfo = CCValAssign::SExt;
+ else if (ArgFlags.isZExt())
+ LocInfo = CCValAssign::ZExt;
+ else
+ LocInfo = CCValAssign::AExt;
+ }
+
+ // Handle byval arguments
+ if (ArgFlags.isByVal()) {
+ State.HandleByVal(ValNo++, ArgVT, LocVT, LocInfo, 2, 2, ArgFlags);
+ continue;
+ }
+
+ unsigned Parts = ArgsParts[i];
+
+ if (!UseStack && Parts <= RegsLeft) {
+ unsigned FirstVal = ValNo;
+ for (unsigned j = 0; j < Parts; j++) {
+ unsigned Reg = State.AllocateReg(RegList, NbRegs);
+ State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
+ RegsLeft--;
+ }
+
+ // Reverse the order of the pieces to agree with the "big endian" format
+ // required in the calling convention ABI.
+ SmallVectorImpl<CCValAssign>::iterator B = ArgLocs.begin() + FirstVal;
+ std::reverse(B, B + Parts);
+ } else {
+ UseStack = true;
+ for (unsigned j = 0; j < Parts; j++)
+ CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
+ }
+ }
+}
+
+static void AnalyzeRetResult(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) {
+ State.AnalyzeCallResult(Ins, RetCC_MSP430);
+}
+
+static void AnalyzeRetResult(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ State.AnalyzeReturn(Outs, RetCC_MSP430);
+}
+
+template<typename ArgT>
+static void AnalyzeReturnValues(CCState &State,
+ SmallVectorImpl<CCValAssign> &RVLocs,
+ const SmallVectorImpl<ArgT> &Args) {
+ AnalyzeRetResult(State, Args);
+
+ // Reverse splitted return values to get the "big endian" format required
+ // to agree with the calling convention ABI.
+ std::reverse(RVLocs.begin(), RVLocs.end());
+}
+
SDValue
MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -325,7 +442,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+ AnalyzeArguments(CCInfo, ArgLocs, Ins);
// Create frame index for the start of the first vararg value
if (isVarArg) {
@@ -423,7 +540,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
getTargetMachine(), RVLocs, *DAG.getContext());
// Analize return values.
- CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+ AnalyzeReturnValues(CCInfo, RVLocs, Outs);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -471,8 +588,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
-
- CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+ AnalyzeArguments(CCInfo, ArgLocs, Outs);
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -610,7 +726,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+ AnalyzeReturnValues(CCInfo, RVLocs, Ins);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index c850594..7a0b00a 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -22,11 +22,14 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "MSP430GenInstrInfo.inc"
using namespace llvm;
+// Pin the vtable to this file.
+void MSP430InstrInfo::anchor() {}
+
MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
: MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
RI(tm) {}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index d79f992..ad2b8cc 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -42,6 +42,7 @@ namespace MSP430II {
class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
+ virtual void anchor();
public:
explicit MSP430InstrInfo(MSP430TargetMachine &TM);
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 043e5be..52f9ee5 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -33,7 +33,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
case 0: break;
}
- return Printer.Mang->getSymbol(MO.getGlobal());
+ return Printer.getSymbol(MO.getGlobal());
}
MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index f86428c..38be25c 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -23,84 +23,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
- if ((C < 'a' || C > 'z') &&
- (C < 'A' || C > 'Z') &&
- (C < '0' || C > '9') &&
- C != '_' && C != '$' && C != '@' &&
- !(AllowPeriod && C == '.') &&
- !(AllowUTF8 && (C & 0x80)))
- return false;
- return true;
-}
-
-static char HexDigit(int V) {
- return V < 10 ? V+'0' : V+'A'-10;
-}
-
-static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
- OutName.push_back('_');
- OutName.push_back(HexDigit(C >> 4));
- OutName.push_back(HexDigit(C & 15));
- OutName.push_back('_');
-}
-
-/// NameNeedsEscaping - Return true if the identifier \p Str needs quotes
-/// for this assembler.
-static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo *MAI) {
- assert(!Str.empty() && "Cannot create an empty MCSymbol");
-
- // If the first character is a number and the target does not allow this, we
- // need quotes.
- if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
- return true;
-
- // If any of the characters in the string is an unacceptable character, force
- // quotes.
- bool AllowPeriod = MAI->doesAllowPeriodsInName();
- bool AllowUTF8 = MAI->doesAllowUTF8();
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
- return true;
- return false;
-}
-
-/// appendMangledName - Add the specified string in mangled form if it uses
-/// any unusual characters.
-static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
- const MCAsmInfo *MAI) {
- // The first character is not allowed to be a number unless the target
- // explicitly allows it.
- if (!MAI->doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
- MangleLetter(OutName, Str[0]);
- Str = Str.substr(1);
- }
-
- bool AllowPeriod = MAI->doesAllowPeriodsInName();
- bool AllowUTF8 = MAI->doesAllowUTF8();
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
- MangleLetter(OutName, Str[i]);
- else
- OutName.push_back(Str[i]);
- }
-}
-
-
-/// appendMangledQuotedName - On systems that support quoted symbols, we still
-/// have to escape some (obscure) characters like " and \n which would break the
-/// assembler's lexing.
-static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
- StringRef Str) {
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (Str[i] == '"' || Str[i] == '\n')
- MangleLetter(OutName, Str[i]);
- else
- OutName.push_back(Str[i]);
- }
-}
-
-
/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
/// and the specified name as the global variable name. GVName must not be
/// empty.
@@ -111,7 +33,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
- const MCAsmInfo *MAI = Context.getAsmInfo();
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
// If the global name is not led with \1, add the appropriate prefixes.
if (Name[0] == '\1') {
@@ -136,26 +58,9 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
OutName.append(Prefix, Prefix+strlen(Prefix));
}
}
-
+
// If this is a simple string that doesn't need escaping, just append it.
- if (!NameNeedsEscaping(Name, MAI) ||
- // If quotes are supported, they can be used unless the string contains
- // a quote or newline.
- (MAI->doesAllowQuotesInName() &&
- Name.find_first_of("\n\"") == StringRef::npos)) {
- OutName.append(Name.begin(), Name.end());
- return;
- }
-
- // On systems that do not allow quoted names, we need to mangle most
- // strange characters.
- if (!MAI->doesAllowQuotesInName())
- return appendMangledName(OutName, Name, MAI);
-
- // Okay, the system allows quoted strings. We can quote most anything, the
- // only characters that need escaping are " and \n.
- assert(Name.find_first_of("\n\"") != StringRef::npos);
- return appendMangledQuotedName(OutName, Name);
+ OutName.append(Name.begin(), Name.end());
}
/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
@@ -212,7 +117,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
// add it.
- if (Context.getAsmInfo()->hasMicrosoftFastStdCallMangling()) {
+ if (TM->getMCAsmInfo()->hasMicrosoftFastStdCallMangling()) {
if (const Function *F = dyn_cast<Function>(GV)) {
CallingConv::ID CC = F->getCallingConv();
@@ -236,13 +141,3 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
}
}
}
-
-/// getSymbol - Return the MCSymbol for the specified global value. This
-/// symbol is the main label that is the address of the global.
-MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
- SmallString<60> NameStr;
- getNameWithPrefix(NameStr, GV, false);
- return Context.GetOrCreateSymbol(NameStr.str());
-}
-
-
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 3dd6562..cdae6c2 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsRegisterInfo.h"
+#include "MipsTargetStreamer.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -24,23 +25,25 @@
using namespace llvm;
+namespace llvm {
+class MCInstrInfo;
+}
+
namespace {
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions():
- aTReg(1), reorder(true), macro(true) {
- }
+ MipsAssemblerOptions() : aTReg(1), reorder(true), macro(true) {}
- unsigned getATRegNum() {return aTReg;}
+ unsigned getATRegNum() { return aTReg; }
bool setATReg(unsigned Reg);
- bool isReorder() {return reorder;}
- void setReorder() {reorder = true;}
- void setNoreorder() {reorder = false;}
+ bool isReorder() { return reorder; }
+ void setReorder() { reorder = true; }
+ void setNoreorder() { reorder = false; }
- bool isMacro() {return macro;}
- void setMacro() {macro = true;}
- void setNomacro() {macro = false;}
+ bool isMacro() { return macro; }
+ void setMacro() { macro = true; }
+ void setNomacro() { macro = false; }
private:
unsigned aTReg;
@@ -52,23 +55,21 @@ private:
namespace {
class MipsAsmParser : public MCTargetAsmParser {
- enum FpFormatTy {
- FP_FORMAT_NONE = -1,
- FP_FORMAT_S,
- FP_FORMAT_D,
- FP_FORMAT_L,
- FP_FORMAT_W
- } FpFormat;
+ MipsTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = Parser.getStreamer().getTargetStreamer();
+ return static_cast<MipsTargetStreamer &>(TS);
+ }
MCSubtargetInfo &STI;
MCAsmParser &Parser;
MipsAssemblerOptions Options;
+ bool hasConsumedDollar;
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
bool MatchingInlineAsm);
@@ -76,56 +77,98 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands);
bool ParseDirective(AsmToken DirectiveID);
MipsAsmParser::OperandMatchResultTy
- parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int RegKind);
+ parseRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands, int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMSARegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands, int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseGPR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMSACtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseGPR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ bool parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind);
MipsAsmParser::OperandMatchResultTy
- parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseGPR32(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseGPR64(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseHWRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
MipsAsmParser::OperandMatchResultTy
- parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseFGRH32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseACC64DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseLO32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseHI32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseCOP2(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128BRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128HRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128WRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128DRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseMSA128CtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+
+ MipsAsmParser::OperandMatchResultTy
+ parseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
unsigned RegKind);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &,
+ bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &,
StringRef Mnemonic);
int tryParseRegister(bool is64BitReg);
- bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ bool tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
bool is64BitReg);
bool needsExpansion(MCInst &Inst);
@@ -139,17 +182,19 @@ class MipsAsmParser : public MCTargetAsmParser {
void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions,
- bool isLoad,bool isImmOpnd);
+ SmallVectorImpl<MCInst> &Instructions, bool isLoad,
+ bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
- const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+ const MCExpr *evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
bool isEvaluated(const MCExpr *Expr);
bool parseDirectiveSet();
+ bool parseDirectiveMipsHackStocg();
+ bool parseDirectiveMipsHackELFFlags();
bool parseSetAtDirective();
bool parseSetNoAtDirective();
@@ -161,6 +206,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetAssignment();
bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveGpWord();
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
@@ -172,40 +218,49 @@ class MipsAsmParser : public MCTargetAsmParser {
return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
}
+ bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
+
int matchRegisterName(StringRef Symbol, bool is64BitReg);
int matchCPURegisterName(StringRef Symbol);
int matchRegisterByNumber(unsigned RegNum, unsigned RegClass);
- int matchFPURegisterName(StringRef Name, FpFormatTy Format);
+ int matchFPURegisterName(StringRef Name);
- void setFpFormat(FpFormatTy Format) {
- FpFormat = Format;
- }
+ int matchFCCRegisterName(StringRef Name);
- void setDefaultFpFormat();
+ int matchACRegisterName(StringRef Name);
- void setFpFormat(StringRef Format);
+ int matchMSA128RegisterName(StringRef Name);
- FpFormatTy getFpFormat() {return FpFormat;}
+ int matchMSA128CtrlRegisterName(StringRef Name);
+
+ int regKindToRegClass(int RegKind);
unsigned getReg(int RC, int RegNo);
int getATReg();
bool processInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ SmallVectorImpl<MCInst> &Instructions);
+
+ // Helper function that checks if the value of a vector index is within the
+ // boundaries of accepted values for each RegisterKind
+ // Example: INSERT.B $w0[n], $1 => 16 > n >= 0
+ bool validateMSAIndex(int Val, int RegKind);
+
public:
- MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser),
+ hasConsumedDollar(false) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
};
}
@@ -221,13 +276,21 @@ public:
Kind_GPR32,
Kind_GPR64,
Kind_HWRegs,
- Kind_HW64Regs,
Kind_FGR32Regs,
+ Kind_FGRH32Regs,
Kind_FGR64Regs,
Kind_AFGR64Regs,
Kind_CCRRegs,
Kind_FCCRegs,
- Kind_ACRegsDSP
+ Kind_ACC64DSP,
+ Kind_LO32DSP,
+ Kind_HI32DSP,
+ Kind_COP2,
+ Kind_MSA128BRegs,
+ Kind_MSA128HRegs,
+ Kind_MSA128WRegs,
+ Kind_MSA128DRegs,
+ Kind_MSA128CtrlRegs
};
private:
@@ -238,7 +301,9 @@ private:
k_Memory,
k_PostIndexRegister,
k_Register,
- k_Token
+ k_PtrReg,
+ k_Token,
+ k_LSAImm
} Kind;
MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -277,7 +342,12 @@ public:
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+ void addPtrRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getPtrReg()));
+ }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediate when possible. Null MCExpr = 0.
if (Expr == 0)
Inst.addOperand(MCOperand::CreateImm(0));
@@ -306,6 +376,9 @@ public:
bool isImm() const { return Kind == k_Immediate; }
bool isToken() const { return Kind == k_Token; }
bool isMem() const { return Kind == k_Memory; }
+ bool isPtrReg() const { return Kind == k_PtrReg; }
+ bool isInvNum() const { return Kind == k_Immediate; }
+ bool isLSAImm() const { return Kind == k_LSAImm; }
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
@@ -317,13 +390,18 @@ public:
return Reg.RegNum;
}
+ unsigned getPtrReg() const {
+ assert((Kind == k_PtrReg) && "Invalid access!");
+ return Reg.RegNum;
+ }
+
void setRegKind(RegisterKind RegKind) {
- assert((Kind == k_Register) && "Invalid access!");
+ assert((Kind == k_Register || Kind == k_PtrReg) && "Invalid access!");
Reg.Kind = RegKind;
}
const MCExpr *getImm() const {
- assert((Kind == k_Immediate) && "Invalid access!");
+ assert((Kind == k_Immediate || Kind == k_LSAImm) && "Invalid access!");
return Imm.Val;
}
@@ -354,6 +432,14 @@ public:
return Op;
}
+ static MipsOperand *CreatePtrReg(unsigned RegNum, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_PtrReg);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
MipsOperand *Op = new MipsOperand(k_Immediate);
Op->Imm.Val = Val;
@@ -362,8 +448,16 @@ public:
return Op;
}
+ static MipsOperand *CreateLSAImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+ MipsOperand *Op = new MipsOperand(k_LSAImm);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
static MipsOperand *CreateMem(unsigned Base, const MCExpr *Off,
- SMLoc S, SMLoc E) {
+ SMLoc S, SMLoc E) {
MipsOperand *Op = new MipsOperand(k_Memory);
Op->Mem.Base = Base;
Op->Mem.Off = Off;
@@ -388,17 +482,12 @@ public:
return Reg.Kind == Kind_HWRegs;
}
- bool isHW64RegsAsm() const {
- assert((Kind == k_Register) && "Invalid access!");
- return Reg.Kind == Kind_HW64Regs;
- }
-
bool isCCRAsm() const {
assert((Kind == k_Register) && "Invalid access!");
return Reg.Kind == Kind_CCRRegs;
}
- bool isAFGR64Asm() const {
+ bool isAFGR64Asm() const {
return Kind == k_Register && Reg.Kind == Kind_AFGR64Regs;
}
@@ -410,28 +499,58 @@ public:
return (Kind == k_Register) && Reg.Kind == Kind_FGR32Regs;
}
+ bool isFGRH32Asm() const {
+ return (Kind == k_Register) && Reg.Kind == Kind_FGRH32Regs;
+ }
+
bool isFCCRegsAsm() const {
return (Kind == k_Register) && Reg.Kind == Kind_FCCRegs;
}
- bool isACRegsDSPAsm() const {
- return Kind == k_Register && Reg.Kind == Kind_ACRegsDSP;
+ bool isACC64DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_ACC64DSP;
}
- /// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const {
- return StartLoc;
+ bool isLO32DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_LO32DSP;
}
- /// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const {
- return EndLoc;
+
+ bool isHI32DSPAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_HI32DSP;
+ }
+
+ bool isCOP2Asm() const { return Kind == k_Register && Reg.Kind == Kind_COP2; }
+
+ bool isMSA128BAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128BRegs;
+ }
+
+ bool isMSA128HAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128HRegs;
+ }
+
+ bool isMSA128WAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128WRegs;
+ }
+
+ bool isMSA128DAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128DRegs;
}
+ bool isMSA128CRAsm() const {
+ return Kind == k_Register && Reg.Kind == Kind_MSA128CtrlRegs;
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
virtual void print(raw_ostream &OS) const {
llvm_unreachable("unimplemented!");
}
}; // class MipsOperand
-} // namespace
+} // namespace
namespace llvm {
extern const MCInstrDesc MipsInsts[];
@@ -462,8 +581,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
// reference or immediate we may have to expand instructions.
for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
const MCOperandInfo &OpInfo = MCID.OpInfo[i];
- if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
- || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+ if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
+ (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
MCOperand &Op = Inst.getOperand(i);
if (Op.isImm()) {
int MemOffset = Op.getImm();
@@ -476,7 +595,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
const MCExpr *Expr = Op.getExpr();
if (Expr->getKind() == MCExpr::SymbolRef) {
const MCSymbolRefExpr *SR =
- static_cast<const MCSymbolRefExpr*>(Expr);
+ static_cast<const MCSymbolRefExpr *>(Expr);
if (SR->getKind() == MCSymbolRefExpr::VK_None) {
// Expand symbol.
expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
@@ -489,7 +608,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
} // for
- } // if load/store
+ } // if load/store
if (needsExpansion(Inst))
expandInstruction(Inst, IDLoc, Instructions);
@@ -512,7 +631,7 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
}
void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+ SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
case Mips::LoadImm32Reg:
return expandLoadImm(Inst, IDLoc, Instructions);
@@ -567,8 +686,9 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+void
+MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(2);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -609,8 +729,9 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+void
+MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -643,14 +764,15 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
}
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
+ SmallVectorImpl<MCInst> &Instructions,
+ bool isLoad, bool isImmOpnd) {
const MCSymbolRefExpr *SR;
MCInst TempInst;
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
- unsigned AtRegNum = getReg((isMips64()) ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID, getATReg());
+ unsigned AtRegNum = getReg(
+ (isMips64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
// 1st operand is either the source or destination register.
assert(Inst.getOperand(0).isReg() && "expected register operand kind");
unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -680,7 +802,7 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.addOperand(MCOperand::CreateImm(HiOffset));
else {
if (ExprOffset->getKind() == MCExpr::SymbolRef) {
- SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
+ SR = static_cast<const MCSymbolRefExpr *>(ExprOffset);
const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
getContext());
@@ -723,15 +845,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.clear();
}
-bool MipsAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool MipsAsmParser::MatchAndEmitInstruction(
+ SMLoc IDLoc, unsigned &Opcode,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo, bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
- unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- MatchingInlineAsm);
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
default:
@@ -752,7 +873,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((MipsOperand *)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -766,44 +887,44 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
int MipsAsmParser::matchCPURegisterName(StringRef Name) {
- int CC;
+ int CC;
if (Name == "at")
return getATReg();
- CC = StringSwitch<unsigned>(Name)
- .Case("zero", 0)
- .Case("a0", 4)
- .Case("a1", 5)
- .Case("a2", 6)
- .Case("a3", 7)
- .Case("v0", 2)
- .Case("v1", 3)
- .Case("s0", 16)
- .Case("s1", 17)
- .Case("s2", 18)
- .Case("s3", 19)
- .Case("s4", 20)
- .Case("s5", 21)
- .Case("s6", 22)
- .Case("s7", 23)
- .Case("k0", 26)
- .Case("k1", 27)
- .Case("sp", 29)
- .Case("fp", 30)
- .Case("gp", 28)
- .Case("ra", 31)
- .Case("t0", 8)
- .Case("t1", 9)
- .Case("t2", 10)
- .Case("t3", 11)
- .Case("t4", 12)
- .Case("t5", 13)
- .Case("t6", 14)
- .Case("t7", 15)
- .Case("t8", 24)
- .Case("t9", 25)
- .Default(-1);
+ CC = StringSwitch<unsigned>(Name)
+ .Case("zero", 0)
+ .Case("a0", 4)
+ .Case("a1", 5)
+ .Case("a2", 6)
+ .Case("a3", 7)
+ .Case("v0", 2)
+ .Case("v1", 3)
+ .Case("s0", 16)
+ .Case("s1", 17)
+ .Case("s2", 18)
+ .Case("s3", 19)
+ .Case("s4", 20)
+ .Case("s5", 21)
+ .Case("s6", 22)
+ .Case("s7", 23)
+ .Case("k0", 26)
+ .Case("k1", 27)
+ .Case("sp", 29)
+ .Case("fp", 30)
+ .Case("gp", 28)
+ .Case("ra", 31)
+ .Case("t0", 8)
+ .Case("t1", 9)
+ .Case("t2", 10)
+ .Case("t3", 11)
+ .Case("t4", 12)
+ .Case("t5", 13)
+ .Case("t6", 14)
+ .Case("t7", 15)
+ .Case("t8", 24)
+ .Case("t9", 25)
+ .Default(-1);
// Although SGI documentation just cuts out t0-t3 for n32/n64,
// GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
@@ -813,72 +934,140 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
if (CC == -1 && isMips64())
CC = StringSwitch<unsigned>(Name)
- .Case("a4", 8)
- .Case("a5", 9)
- .Case("a6", 10)
- .Case("a7", 11)
- .Case("kt0", 26)
- .Case("kt1", 27)
- .Case("s8", 30)
- .Default(-1);
+ .Case("a4", 8)
+ .Case("a5", 9)
+ .Case("a6", 10)
+ .Case("a7", 11)
+ .Case("kt0", 26)
+ .Case("kt1", 27)
+ .Case("s8", 30)
+ .Default(-1);
return CC;
}
-int MipsAsmParser::matchFPURegisterName(StringRef Name, FpFormatTy Format) {
+int MipsAsmParser::matchFPURegisterName(StringRef Name) {
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
- return -1; // This is not an integer.
- if (IntVal > 31)
+ return -1; // This is not an integer.
+ if (IntVal > 31) // Maximum index for fpu register.
return -1;
+ return IntVal;
+ }
+ return -1;
+}
- if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
- return getReg(Mips::FGR32RegClassID, IntVal);
- if (Format == FP_FORMAT_D) {
- if (isFP64()) {
- return getReg(Mips::FGR64RegClassID, IntVal);
- }
- // Only even numbers available as register pairs.
- if ((IntVal > 31) || (IntVal % 2 != 0))
- return -1;
- return getReg(Mips::AFGR64RegClassID, IntVal / 2);
- }
+int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
+
+ if (Name.startswith("fcc")) {
+ StringRef NumString = Name.substr(3);
+ unsigned IntVal;
+ if (NumString.getAsInteger(10, IntVal))
+ return -1; // This is not an integer.
+ if (IntVal > 7) // There are only 8 fcc registers.
+ return -1;
+ return IntVal;
}
return -1;
}
-int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
+int MipsAsmParser::matchACRegisterName(StringRef Name) {
+
+ if (Name.startswith("ac")) {
+ StringRef NumString = Name.substr(2);
+ unsigned IntVal;
+ if (NumString.getAsInteger(10, IntVal))
+ return -1; // This is not an integer.
+ if (IntVal > 3) // There are only 3 acc registers.
+ return -1;
+ return IntVal;
+ }
+ return -1;
+}
- if (Name.equals("fcc0"))
- return Mips::FCC0;
+int MipsAsmParser::matchMSA128RegisterName(StringRef Name) {
+ unsigned IntVal;
+
+ if (Name.front() != 'w' || Name.drop_front(1).getAsInteger(10, IntVal))
+ return -1;
+
+ if (IntVal > 31)
+ return -1;
+
+ return IntVal;
+}
+
+int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
+ int CC;
+
+ CC = StringSwitch<unsigned>(Name)
+ .Case("msair", 0)
+ .Case("msacsr", 1)
+ .Case("msaaccess", 2)
+ .Case("msasave", 3)
+ .Case("msamodify", 4)
+ .Case("msarequest", 5)
+ .Case("msamap", 6)
+ .Case("msaunmap", 7)
+ .Default(-1);
+
+ return CC;
+}
+
+int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
int CC;
CC = matchCPURegisterName(Name);
if (CC != -1)
return matchRegisterByNumber(CC, is64BitReg ? Mips::GPR64RegClassID
: Mips::GPR32RegClassID);
- return matchFPURegisterName(Name, getFpFormat());
-}
-
-void MipsAsmParser::setDefaultFpFormat() {
-
- if (isMips64() || isFP64())
- FpFormat = FP_FORMAT_D;
- else
- FpFormat = FP_FORMAT_S;
+ CC = matchFPURegisterName(Name);
+ // TODO: decide about fpu register class
+ if (CC != -1)
+ return matchRegisterByNumber(CC, isFP64() ? Mips::FGR64RegClassID
+ : Mips::FGR32RegClassID);
+ return matchMSA128RegisterName(Name);
}
-void MipsAsmParser::setFpFormat(StringRef Format) {
-
- FpFormat = StringSwitch<FpFormatTy>(Format.lower())
- .Case(".s", FP_FORMAT_S)
- .Case(".d", FP_FORMAT_D)
- .Case(".l", FP_FORMAT_L)
- .Case(".w", FP_FORMAT_W)
- .Default(FP_FORMAT_NONE);
+int MipsAsmParser::regKindToRegClass(int RegKind) {
+
+ switch (RegKind) {
+ case MipsOperand::Kind_GPR32:
+ return Mips::GPR32RegClassID;
+ case MipsOperand::Kind_GPR64:
+ return Mips::GPR64RegClassID;
+ case MipsOperand::Kind_HWRegs:
+ return Mips::HWRegsRegClassID;
+ case MipsOperand::Kind_FGR32Regs:
+ return Mips::FGR32RegClassID;
+ case MipsOperand::Kind_FGRH32Regs:
+ return Mips::FGRH32RegClassID;
+ case MipsOperand::Kind_FGR64Regs:
+ return Mips::FGR64RegClassID;
+ case MipsOperand::Kind_AFGR64Regs:
+ return Mips::AFGR64RegClassID;
+ case MipsOperand::Kind_CCRRegs:
+ return Mips::CCRRegClassID;
+ case MipsOperand::Kind_ACC64DSP:
+ return Mips::ACC64DSPRegClassID;
+ case MipsOperand::Kind_FCCRegs:
+ return Mips::FCCRegClassID;
+ case MipsOperand::Kind_MSA128BRegs:
+ return Mips::MSA128BRegClassID;
+ case MipsOperand::Kind_MSA128HRegs:
+ return Mips::MSA128HRegClassID;
+ case MipsOperand::Kind_MSA128WRegs:
+ return Mips::MSA128WRegClassID;
+ case MipsOperand::Kind_MSA128DRegs:
+ return Mips::MSA128DRegClassID;
+ case MipsOperand::Kind_MSA128CtrlRegs:
+ return Mips::MSACtrlRegClassID;
+ default:
+ return -1;
+ }
}
bool MipsAssemblerOptions::setATReg(unsigned Reg) {
@@ -889,17 +1078,15 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-int MipsAsmParser::getATReg() {
- return Options.getATRegNum();
-}
+int MipsAsmParser::getATReg() { return Options.getATRegNum(); }
unsigned MipsAsmParser::getReg(int RC, int RegNo) {
return *(getContext().getRegisterInfo()->getRegClass(RC).begin() + RegNo);
}
int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
-
- if (RegNum > 31)
+ if (RegNum >
+ getContext().getRegisterInfo()->getRegClass(RegClass).getNumRegs())
return -1;
return getReg(RegClass, RegNum);
@@ -914,12 +1101,13 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
RegNum = matchRegisterName(lowerCase, is64BitReg);
} else if (Tok.is(AsmToken::Integer))
RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
- is64BitReg ? Mips::GPR64RegClassID : Mips::GPR32RegClassID);
+ is64BitReg ? Mips::GPR64RegClassID
+ : Mips::GPR32RegClassID);
return RegNum;
}
bool MipsAsmParser::tryParseRegisterOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, bool is64BitReg) {
SMLoc S = Parser.getTok().getLoc();
int RegNo = -1;
@@ -928,14 +1116,15 @@ bool MipsAsmParser::tryParseRegisterOperand(
if (RegNo == -1)
return true;
- Operands.push_back(MipsOperand::CreateReg(RegNo, S,
- Parser.getTok().getLoc()));
+ Operands.push_back(
+ MipsOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
Parser.Lex(); // Eat register token.
return false;
}
-bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
- StringRef Mnemonic) {
+bool
+MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -983,22 +1172,39 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-
MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
-
// Otherwise create a symbol reference.
- const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
- getContext());
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
Operands.push_back(MipsOperand::CreateImm(Res, S, E));
return false;
}
case AsmToken::Identifier:
+ // For instruction aliases like "bc1f $Label" dedicated parser will
+ // eat the '$' sign before failing. So in order to look for appropriate
+ // label we must check first if we have already consumed '$'.
+ if (hasConsumedDollar) {
+ hasConsumedDollar = false;
+ SMLoc S = Parser.getTok().getLoc();
+ StringRef Identifier;
+ if (Parser.parseIdentifier(Identifier))
+ return true;
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
+ // Create a symbol reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+
+ Operands.push_back(MipsOperand::CreateImm(Res, S, E));
+ return false;
+ }
// Look for the existing symbol, we should check if
// we need to assigne the propper RegisterKind.
if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
return false;
- // Else drop to expression parsing.
+ // Else drop to expression parsing.
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1029,7 +1235,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
return true;
}
-const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
StringRef RelocStr) {
const MCExpr *Res;
// Check the type of the expression.
@@ -1099,7 +1305,7 @@ bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
}
bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
- Parser.Lex(); // Eat the % token.
+ Parser.Lex(); // Eat the % token.
const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -1145,7 +1351,7 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
StartLoc = Parser.getTok().getLoc();
RegNo = tryParseRegister(isMips64());
EndLoc = Parser.getTok().getLoc();
- return (RegNo == (unsigned) -1);
+ return (RegNo == (unsigned)-1);
}
bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
@@ -1177,11 +1383,12 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
}
MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
const MCExpr *IdVal = 0;
SMLoc S;
bool isParenExpr = false;
+ MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch;
// First operand is the offset.
S = Parser.getTok().getLoc();
@@ -1196,21 +1403,20 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
- MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+ MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
if (Mnemonic->getToken() == "la") {
- SMLoc E = SMLoc::getFromPointer(
- Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
return MatchOperand_Success;
}
if (Tok.is(AsmToken::EndOfStatement)) {
- SMLoc E = SMLoc::getFromPointer(
- Parser.getTok().getLoc().getPointer() - 1);
+ SMLoc E =
+ SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
// Zero register assumed, add a memory operand with ZERO as its base.
- Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
- : Mips::ZERO,
- IdVal, S, E));
+ Operands.push_back(MipsOperand::CreateMem(
+ isMips64() ? Mips::ZERO_64 : Mips::ZERO, IdVal, S, E));
return MatchOperand_Success;
}
Error(Parser.getTok().getLoc(), "'(' expected");
@@ -1220,21 +1426,12 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
Parser.Lex(); // Eat the '(' token.
}
- const AsmToken &Tok1 = Parser.getTok(); // Get next token
- if (Tok1.is(AsmToken::Dollar)) {
- Parser.Lex(); // Eat the '$' token.
- if (tryParseRegisterOperand(Operands, isMips64())) {
- Error(Parser.getTok().getLoc(), "unexpected token in operand");
- return MatchOperand_ParseFail;
- }
-
- } else {
- Error(Parser.getTok().getLoc(), "unexpected token in operand");
- return MatchOperand_ParseFail;
- }
+ Res = parseRegs(Operands, isMips64() ? (int)MipsOperand::Kind_GPR64
+ : (int)MipsOperand::Kind_GPR32);
+ if (Res != MatchOperand_Success)
+ return Res;
- const AsmToken &Tok2 = Parser.getTok(); // Get next token.
- if (Tok2.isNot(AsmToken::RParen)) {
+ if (Parser.getTok().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "')' expected");
return MatchOperand_ParseFail;
}
@@ -1247,7 +1444,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
IdVal = MCConstantExpr::Create(0, getContext());
// Replace the register operand with the memory operand.
- MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
+ MipsOperand *op = static_cast<MipsOperand *>(Operands.back());
int RegNo = op->getReg();
// Remove the register from the operands.
Operands.pop_back();
@@ -1266,31 +1463,150 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
return MatchOperand_Success;
}
+bool MipsAsmParser::parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ // If the first token is not '$' we have an error.
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return false;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex();
+ AsmToken::TokenKind TkKind = getLexer().getKind();
+ int Reg;
+
+ if (TkKind == AsmToken::Integer) {
+ Reg = matchRegisterByNumber(Parser.getTok().getIntVal(),
+ regKindToRegClass(RegKind));
+ if (Reg == -1)
+ return false;
+ } else if (TkKind == AsmToken::Identifier) {
+ if ((Reg = matchCPURegisterName(Parser.getTok().getString().lower())) == -1)
+ return false;
+ Reg = getReg(regKindToRegClass(RegKind), Reg);
+ } else {
+ return false;
+ }
+
+ MipsOperand *Op = MipsOperand::CreatePtrReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind((MipsOperand::RegisterKind)RegKind);
+ Operands.push_back(Op);
+ Parser.Lex();
+ return true;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parsePtrReg(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ MipsOperand::RegisterKind RegKind =
+ isN64() ? MipsOperand::Kind_GPR64 : MipsOperand::Kind_GPR32;
+
+ // Parse index register.
+ if (!parsePtrReg(Operands, RegKind))
+ return MatchOperand_NoMatch;
+
+ // Parse '('.
+ if (Parser.getTok().isNot(AsmToken::LParen))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc()));
+ Parser.Lex();
+
+ // Parse base register.
+ if (!parsePtrReg(Operands, RegKind))
+ return MatchOperand_NoMatch;
+
+ // Parse ')'.
+ if (Parser.getTok().isNot(AsmToken::RParen))
+ return MatchOperand_NoMatch;
+
+ Operands.push_back(MipsOperand::CreateToken(")", getLexer().getLoc()));
+ Parser.Lex();
+
+ return MatchOperand_Success;
+}
+
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
int RegKind) {
MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
- if (getLexer().getKind() == AsmToken::Identifier) {
+ if (getLexer().getKind() == AsmToken::Identifier && !hasConsumedDollar) {
if (searchSymbolAlias(Operands, Kind))
return MatchOperand_Success;
return MatchOperand_NoMatch;
}
+ SMLoc S = Parser.getTok().getLoc();
// If the first token is not '$', we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+ if (Parser.getTok().isNot(AsmToken::Dollar) && !hasConsumedDollar)
return MatchOperand_NoMatch;
-
- Parser.Lex(); // Eat $
- if (!tryParseRegisterOperand(Operands,
- RegKind == MipsOperand::Kind_GPR64)) {
- // Set the proper register kind.
- MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
- op->setRegKind(Kind);
- if ((Kind == MipsOperand::Kind_GPR32)
- && (getLexer().is(AsmToken::LParen))) {
+ if (!hasConsumedDollar) {
+ Parser.Lex(); // Eat the '$'
+ hasConsumedDollar = true;
+ }
+ if (getLexer().getKind() == AsmToken::Identifier) {
+ int RegNum = -1;
+ std::string RegName = Parser.getTok().getString().lower();
+ // Match register by name
+ switch (RegKind) {
+ case MipsOperand::Kind_GPR32:
+ case MipsOperand::Kind_GPR64:
+ RegNum = matchCPURegisterName(RegName);
+ break;
+ case MipsOperand::Kind_AFGR64Regs:
+ case MipsOperand::Kind_FGR64Regs:
+ case MipsOperand::Kind_FGR32Regs:
+ case MipsOperand::Kind_FGRH32Regs:
+ RegNum = matchFPURegisterName(RegName);
+ if (RegKind == MipsOperand::Kind_AFGR64Regs)
+ RegNum /= 2;
+ else if (RegKind == MipsOperand::Kind_FGRH32Regs && !isFP64())
+ if (RegNum != -1 && RegNum % 2 != 0)
+ Warning(S, "Float register should be even.");
+ break;
+ case MipsOperand::Kind_FCCRegs:
+ RegNum = matchFCCRegisterName(RegName);
+ break;
+ case MipsOperand::Kind_ACC64DSP:
+ RegNum = matchACRegisterName(RegName);
+ break;
+ default:
+ break; // No match, value is set to -1.
+ }
+ // No match found, return _NoMatch to give a chance to other round.
+ if (RegNum < 0)
+ return MatchOperand_NoMatch;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_NoMatch;
+
+ MipsOperand *Op =
+ MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+ hasConsumedDollar = false;
+ Parser.Lex(); // Eat the register name.
+ return MatchOperand_Success;
+ } else if (getLexer().getKind() == AsmToken::Integer) {
+ unsigned RegNum = Parser.getTok().getIntVal();
+ if (Kind == MipsOperand::Kind_HWRegs) {
+ if (RegNum != 29)
+ return MatchOperand_NoMatch;
+ // Only hwreg 29 is supported, found at index 0.
+ RegNum = 0;
+ }
+ int Reg = matchRegisterByNumber(RegNum, regKindToRegClass(Kind));
+ if (Reg == -1)
+ return MatchOperand_NoMatch;
+ MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+ hasConsumedDollar = false;
+ Parser.Lex(); // Eat the register number.
+ if ((RegKind == MipsOperand::Kind_GPR32) &&
+ (getLexer().is(AsmToken::LParen))) {
// Check if it is indexed addressing operand.
Operands.push_back(MipsOperand::CreateToken("(", getLexer().getLoc()));
Parser.Lex(); // Eat the parenthesis.
- if (parseRegs(Operands,RegKind) != MatchOperand_Success)
+ if (parseRegs(Operands, RegKind) != MatchOperand_Success)
return MatchOperand_NoMatch;
if (getLexer().isNot(AsmToken::RParen))
return MatchOperand_NoMatch;
@@ -1302,49 +1618,254 @@ MipsAsmParser::parseRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return MatchOperand_NoMatch;
}
+bool MipsAsmParser::validateMSAIndex(int Val, int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+
+ if (Val < 0)
+ return false;
+
+ switch (Kind) {
+ default:
+ return false;
+ case MipsOperand::Kind_MSA128BRegs:
+ return Val < 16;
+ case MipsOperand::Kind_MSA128HRegs:
+ return Val < 8;
+ case MipsOperand::Kind_MSA128WRegs:
+ return Val < 4;
+ case MipsOperand::Kind_MSA128DRegs:
+ return Val < 2;
+ }
+}
+
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseGPR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseMSARegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+ SMLoc S = Parser.getTok().getLoc();
+ std::string RegName;
- if (!isMips64())
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ switch (RegKind) {
+ default:
+ return MatchOperand_ParseFail;
+ case MipsOperand::Kind_MSA128BRegs:
+ case MipsOperand::Kind_MSA128HRegs:
+ case MipsOperand::Kind_MSA128WRegs:
+ case MipsOperand::Kind_MSA128DRegs:
+ break;
+ }
+
+ Parser.Lex(); // Eat the '$'.
+ if (getLexer().getKind() == AsmToken::Identifier)
+ RegName = Parser.getTok().getString().lower();
+ else
+ return MatchOperand_ParseFail;
+
+ int RegNum = matchMSA128RegisterName(RegName);
+
+ if (RegNum < 0 || RegNum > 31)
+ return MatchOperand_ParseFail;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *Op = MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ Op->setRegKind(Kind);
+ Operands.push_back(Op);
+
+ Parser.Lex(); // Eat the register identifier.
+
+ // MSA registers may be suffixed with an index in the form of:
+ // 1) Immediate expression.
+ // 2) General Purpose Register.
+ // Examples:
+ // 1) copy_s.b $29,$w0[0]
+ // 2) sld.b $w0,$w1[$1]
+
+ if (Parser.getTok().isNot(AsmToken::LBrac))
+ return MatchOperand_Success;
+
+ MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
+
+ Operands.push_back(MipsOperand::CreateToken("[", Parser.getTok().getLoc()));
+ Parser.Lex(); // Parse the '[' token.
+
+ if (Parser.getTok().is(AsmToken::Dollar)) {
+ // This must be a GPR.
+ MipsOperand *RegOp;
+ SMLoc VIdx = Parser.getTok().getLoc();
+ Parser.Lex(); // Parse the '$' token.
+
+ // GPR have aliases and we must account for that. Example: $30 == $fp
+ if (getLexer().getKind() == AsmToken::Integer) {
+ unsigned RegNum = Parser.getTok().getIntVal();
+ int Reg = matchRegisterByNumber(
+ RegNum, regKindToRegClass(MipsOperand::Kind_GPR32));
+ if (Reg == -1) {
+ Error(VIdx, "invalid general purpose register");
+ return MatchOperand_ParseFail;
+ }
+
+ RegOp = MipsOperand::CreateReg(Reg, VIdx, Parser.getTok().getLoc());
+ } else if (getLexer().getKind() == AsmToken::Identifier) {
+ int RegNum = -1;
+ std::string RegName = Parser.getTok().getString().lower();
+
+ RegNum = matchCPURegisterName(RegName);
+ if (RegNum == -1) {
+ Error(VIdx, "general purpose register expected");
+ return MatchOperand_ParseFail;
+ }
+ RegNum = getReg(regKindToRegClass(MipsOperand::Kind_GPR32), RegNum);
+ RegOp = MipsOperand::CreateReg(RegNum, VIdx, Parser.getTok().getLoc());
+ } else
+ return MatchOperand_ParseFail;
+
+ RegOp->setRegKind(MipsOperand::Kind_GPR32);
+ Operands.push_back(RegOp);
+ Parser.Lex(); // Eat the register identifier.
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+
+ Operands.push_back(MipsOperand::CreateToken("]", Parser.getTok().getLoc()));
+ Parser.Lex(); // Parse the ']' token.
+
+ return MatchOperand_Success;
+ }
+
+ // The index must be a constant expression then.
+ SMLoc VIdx = Parser.getTok().getLoc();
+ const MCExpr *ImmVal;
+
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+
+ const MCConstantExpr *expr = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!expr || !validateMSAIndex((int)expr->getValue(), Kind)) {
+ Error(VIdx, "invalid immediate value");
+ return MatchOperand_ParseFail;
+ }
+
+ SMLoc E = Parser.getTok().getEndLoc();
+
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return MatchOperand_ParseFail;
+
+ bool insve =
+ Mnemonic->getToken() == "insve.b" || Mnemonic->getToken() == "insve.h" ||
+ Mnemonic->getToken() == "insve.w" || Mnemonic->getToken() == "insve.d";
+
+ // The second vector index of insve instructions is always 0.
+ if (insve && Operands.size() > 6) {
+ if (expr->getValue() != 0) {
+ Error(VIdx, "immediate value must be 0");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(MipsOperand::CreateToken("0", VIdx));
+ } else
+ Operands.push_back(MipsOperand::CreateImm(expr, VIdx, E));
+
+ Operands.push_back(MipsOperand::CreateToken("]", Parser.getTok().getLoc()));
+
+ Parser.Lex(); // Parse the ']' token.
+
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseMSACtrlRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ int RegKind) {
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+
+ if (Kind != MipsOperand::Kind_MSA128CtrlRegs)
return MatchOperand_NoMatch;
- return parseRegs(Operands, (int) MipsOperand::Kind_GPR64);
+
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_ParseFail;
+
+ SMLoc S = Parser.getTok().getLoc();
+
+ Parser.Lex(); // Eat the '$' symbol.
+
+ int RegNum = -1;
+ if (getLexer().getKind() == AsmToken::Identifier)
+ RegNum = matchMSA128CtrlRegisterName(Parser.getTok().getString().lower());
+ else if (getLexer().getKind() == AsmToken::Integer)
+ RegNum = Parser.getTok().getIntVal();
+ else
+ return MatchOperand_ParseFail;
+
+ if (RegNum < 0 || RegNum > 7)
+ return MatchOperand_ParseFail;
+
+ int RegVal = getReg(regKindToRegClass(Kind), RegNum);
+ if (RegVal == -1)
+ return MatchOperand_ParseFail;
+
+ MipsOperand *RegOp =
+ MipsOperand::CreateReg(RegVal, S, Parser.getTok().getLoc());
+ RegOp->setRegKind(MipsOperand::Kind_MSA128CtrlRegs);
+ Operands.push_back(RegOp);
+ Parser.Lex(); // Eat the register identifier.
+
+ return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseGPR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return parseRegs(Operands, (int) MipsOperand::Kind_GPR32);
+MipsAsmParser::parseGPR64(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+
+ if (!isMips64())
+ return MatchOperand_NoMatch;
+ return parseRegs(Operands, (int)MipsOperand::Kind_GPR64);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseAFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseGPR32(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_GPR32);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseAFGR64Regs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
if (isFP64())
return MatchOperand_NoMatch;
- // Double operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_D);
-
- return parseRegs(Operands, (int) MipsOperand::Kind_AFGR64Regs);
+ return parseRegs(Operands, (int)MipsOperand::Kind_AFGR64Regs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseFGR64Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
if (!isFP64())
return MatchOperand_NoMatch;
- // Double operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_D);
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGR64Regs);
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGR32Regs);
+}
- return parseRegs(Operands, (int) MipsOperand::Kind_FGR64Regs);
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseFGRH32Regs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FGRH32Regs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFGR32Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Single operand is expected, set appropriate format
- setFpFormat(FP_FORMAT_S);
- return parseRegs(Operands, (int) MipsOperand::Kind_FGR32Regs);
+MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_FCCRegs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseACC64DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_ACC64DSP);
+}
+
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseLO32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1357,19 +1878,19 @@ MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- if (!Tok.getIdentifier().startswith("fcc"))
+ if (!Tok.getIdentifier().startswith("ac"))
return MatchOperand_NoMatch;
- StringRef NumString = Tok.getIdentifier().substr(3);
+ StringRef NumString = Tok.getIdentifier().substr(2);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
return MatchOperand_NoMatch;
- unsigned Reg = matchRegisterByNumber(IntVal, Mips::FCCRegClassID);
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::LO32DSPRegClassID);
MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_FCCRegs);
+ Op->setRegKind(MipsOperand::Kind_LO32DSP);
Operands.push_back(Op);
Parser.Lex(); // Eat the register number.
@@ -1377,7 +1898,7 @@ MipsAsmParser::parseFCCRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHI32DSP(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// If the first token is not '$' we have an error.
if (Parser.getTok().isNot(AsmToken::Dollar))
return MatchOperand_NoMatch;
@@ -1390,27 +1911,78 @@ MipsAsmParser::parseACRegsDSP(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
- if (!Tok.getIdentifier().startswith("acc"))
+ if (!Tok.getIdentifier().startswith("ac"))
return MatchOperand_NoMatch;
- StringRef NumString = Tok.getIdentifier().substr(3);
+ StringRef NumString = Tok.getIdentifier().substr(2);
unsigned IntVal;
if (NumString.getAsInteger(10, IntVal))
return MatchOperand_NoMatch;
- unsigned Reg = matchRegisterByNumber(IntVal, Mips::ACRegsDSPRegClassID);
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::HI32DSPRegClassID);
MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_ACRegsDSP);
+ Op->setRegKind(MipsOperand::Kind_HI32DSP);
Operands.push_back(Op);
Parser.Lex(); // Eat the register number.
return MatchOperand_Success;
}
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseCOP2(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ // If the first token is not '$' we have an error.
+ if (Parser.getTok().isNot(AsmToken::Dollar))
+ return MatchOperand_NoMatch;
+
+ SMLoc S = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the '$'
+
+ const AsmToken &Tok = Parser.getTok(); // Get next token.
+
+ if (Tok.isNot(AsmToken::Integer))
+ return MatchOperand_NoMatch;
+
+ unsigned IntVal = Tok.getIntVal();
+
+ unsigned Reg = matchRegisterByNumber(IntVal, Mips::COP2RegClassID);
+
+ MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
+ Op->setRegKind(MipsOperand::Kind_COP2);
+ Operands.push_back(Op);
+
+ Parser.Lex(); // Eat the register number.
+ return MatchOperand_Success;
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128BRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128BRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128HRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128HRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128WRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128WRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128DRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSARegs(Operands, (int)MipsOperand::Kind_MSA128DRegs);
+}
+
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMSA128CtrlRegs(
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseMSACtrlRegs(Operands, (int)MipsOperand::Kind_MSA128CtrlRegs);
+}
+
bool MipsAsmParser::searchSymbolAlias(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegKind) {
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands, unsigned RegKind) {
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
@@ -1421,38 +1993,39 @@ bool MipsAsmParser::searchSymbolAlias(
else
return false;
if (Expr->getKind() == MCExpr::SymbolRef) {
- MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind) RegKind;
- const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+ MipsOperand::RegisterKind Kind = (MipsOperand::RegisterKind)RegKind;
+ const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
const StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
int RegNum = -1;
APInt IntVal(32, -1);
if (!DefSymbol.substr(1).getAsInteger(10, IntVal))
RegNum = matchRegisterByNumber(IntVal.getZExtValue(),
- isMips64()
- ? Mips::GPR64RegClassID
- : Mips::GPR32RegClassID);
+ isMips64() ? Mips::GPR64RegClassID
+ : Mips::GPR32RegClassID);
else {
// Lookup for the register with the corresponding name.
switch (Kind) {
case MipsOperand::Kind_AFGR64Regs:
case MipsOperand::Kind_FGR64Regs:
- RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_D);
+ RegNum = matchFPURegisterName(DefSymbol.substr(1));
break;
case MipsOperand::Kind_FGR32Regs:
- RegNum = matchFPURegisterName(DefSymbol.substr(1), FP_FORMAT_S);
+ RegNum = matchFPURegisterName(DefSymbol.substr(1));
break;
case MipsOperand::Kind_GPR64:
case MipsOperand::Kind_GPR32:
default:
- RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
+ RegNum = matchCPURegisterName(DefSymbol.substr(1));
break;
}
+ if (RegNum > -1)
+ RegNum = getReg(regKindToRegClass(Kind), RegNum);
}
if (RegNum > -1) {
Parser.Lex();
- MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
- Parser.getTok().getLoc());
+ MipsOperand *op =
+ MipsOperand::CreateReg(RegNum, S, Parser.getTok().getLoc());
op->setRegKind(Kind);
Operands.push_back(op);
return true;
@@ -1460,9 +2033,9 @@ bool MipsAsmParser::searchSymbolAlias(
}
} else if (Expr->getKind() == MCExpr::Constant) {
Parser.Lex();
- const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
- MipsOperand *op = MipsOperand::CreateImm(Const, S,
- Parser.getTok().getLoc());
+ const MCConstantExpr *Const = static_cast<const MCConstantExpr *>(Expr);
+ MipsOperand *op =
+ MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc());
Operands.push_back(op);
return true;
}
@@ -1471,115 +2044,101 @@ bool MipsAsmParser::searchSymbolAlias(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-
- // If the first token is not '$' we have error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_NoMatch;
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '$'.
-
- const AsmToken &Tok = Parser.getTok(); // Get the next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
-
- unsigned RegNum = Tok.getIntVal();
- // At the moment only hwreg29 is supported.
- if (RegNum != 29)
- return MatchOperand_ParseFail;
-
- MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
- Parser.getTok().getLoc());
- op->setRegKind(MipsOperand::Kind_HWRegs);
- Operands.push_back(op);
-
- Parser.Lex(); // Eat the register number.
- return MatchOperand_Success;
+MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_HWRegs);
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ return parseRegs(Operands, (int)MipsOperand::Kind_CCRRegs);
+}
- if (!isMips64())
- return MatchOperand_NoMatch;
- // If the first token is not '$' we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ const MCExpr *IdVal;
+ // If the first token is '$' we may have register operand.
+ if (Parser.getTok().is(AsmToken::Dollar))
return MatchOperand_NoMatch;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat $
-
- const AsmToken &Tok = Parser.getTok(); // Get the next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
-
- unsigned RegNum = Tok.getIntVal();
- // At the moment only hwreg29 is supported.
- if (RegNum != 29)
+ if (getParser().parseExpression(IdVal))
return MatchOperand_ParseFail;
-
- MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
- Parser.getTok().getLoc());
- op->setRegKind(MipsOperand::Kind_HW64Regs);
- Operands.push_back(op);
-
- Parser.Lex(); // Eat the register number.
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal);
+ assert(MCE && "Unexpected MCExpr type.");
+ int64_t Val = MCE->getValue();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ Operands.push_back(MipsOperand::CreateImm(
+ MCConstantExpr::Create(0 - Val, getContext()), S, E));
return MatchOperand_Success;
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // If the first token is not '$' we have an error.
- if (Parser.getTok().isNot(AsmToken::Dollar))
+MipsAsmParser::parseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+ switch (getLexer().getKind()) {
+ default:
return MatchOperand_NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ break;
+ }
+ const MCExpr *Expr;
SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the '$'
-
- const AsmToken &Tok = Parser.getTok(); // Get next token.
- if (Tok.isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_ParseFail;
- unsigned Reg = matchRegisterByNumber(Tok.getIntVal(), Mips::CCRRegClassID);
+ int64_t Val;
+ if (!Expr->EvaluateAsAbsolute(Val)) {
+ Error(S, "expected immediate value");
+ return MatchOperand_ParseFail;
+ }
- MipsOperand *Op = MipsOperand::CreateReg(Reg, S, Parser.getTok().getLoc());
- Op->setRegKind(MipsOperand::Kind_CCRRegs);
- Operands.push_back(Op);
+ // The LSA instruction allows a 2-bit unsigned immediate. For this reason
+ // and because the CPU always adds one to the immediate field, the allowed
+ // range becomes 1..4. We'll only check the range here and will deal
+ // with the addition/subtraction when actually decoding/encoding
+ // the instruction.
+ if (Val < 1 || Val > 4) {
+ Error(S, "immediate not in range (1..4)");
+ return MatchOperand_ParseFail;
+ }
- Parser.Lex(); // Eat the register number.
+ Operands.push_back(MipsOperand::CreateLSAImm(Expr, S,
+ Parser.getTok().getLoc()));
return MatchOperand_Success;
}
MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
- MCSymbolRefExpr::VariantKind VK
- = StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
- .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
- .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
- .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
- .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
- .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
- .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
- .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
- .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
- .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
- .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
- .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
- .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
- .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
- .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
- .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
- .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
- .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
- .Default(MCSymbolRefExpr::VK_None);
+ MCSymbolRefExpr::VariantKind VK =
+ StringSwitch<MCSymbolRefExpr::VariantKind>(Symbol)
+ .Case("hi", MCSymbolRefExpr::VK_Mips_ABS_HI)
+ .Case("lo", MCSymbolRefExpr::VK_Mips_ABS_LO)
+ .Case("gp_rel", MCSymbolRefExpr::VK_Mips_GPREL)
+ .Case("call16", MCSymbolRefExpr::VK_Mips_GOT_CALL)
+ .Case("got", MCSymbolRefExpr::VK_Mips_GOT)
+ .Case("tlsgd", MCSymbolRefExpr::VK_Mips_TLSGD)
+ .Case("tlsldm", MCSymbolRefExpr::VK_Mips_TLSLDM)
+ .Case("dtprel_hi", MCSymbolRefExpr::VK_Mips_DTPREL_HI)
+ .Case("dtprel_lo", MCSymbolRefExpr::VK_Mips_DTPREL_LO)
+ .Case("gottprel", MCSymbolRefExpr::VK_Mips_GOTTPREL)
+ .Case("tprel_hi", MCSymbolRefExpr::VK_Mips_TPREL_HI)
+ .Case("tprel_lo", MCSymbolRefExpr::VK_Mips_TPREL_LO)
+ .Case("got_disp", MCSymbolRefExpr::VK_Mips_GOT_DISP)
+ .Case("got_page", MCSymbolRefExpr::VK_Mips_GOT_PAGE)
+ .Case("got_ofst", MCSymbolRefExpr::VK_Mips_GOT_OFST)
+ .Case("hi(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_HI)
+ .Case("lo(%neg(%gp_rel", MCSymbolRefExpr::VK_Mips_GPOFF_LO)
+ .Default(MCSymbolRefExpr::VK_None);
return VK;
}
-bool MipsAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool MipsAsmParser::ParseInstruction(
+ ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
@@ -1757,12 +2316,13 @@ bool MipsAsmParser::parseSetAssignment() {
// We have a '$' followed by something, make sure they are adjacent.
if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
return true;
- StringRef Res = StringRef(DollarLoc.getPointer(),
- getTok().getEndLoc().getPointer() - DollarLoc.getPointer());
+ StringRef Res =
+ StringRef(DollarLoc.getPointer(),
+ getTok().getEndLoc().getPointer() - DollarLoc.getPointer());
Symbol = getContext().GetOrCreateSymbol(Res);
Parser.Lex();
- Value = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
- getContext());
+ Value =
+ MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, getContext());
} else if (Parser.parseExpression(Value))
return reportParseError("expected valid expression after comma");
@@ -1810,6 +2370,34 @@ bool MipsAsmParser::parseDirectiveSet() {
return true;
}
+bool MipsAsmParser::parseDirectiveMipsHackStocg() {
+ MCAsmParser &Parser = getParser();
+ StringRef Name;
+ if (Parser.parseIdentifier(Name))
+ reportParseError("expected identifier");
+
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token");
+ Lex();
+
+ int64_t Flags = 0;
+ if (Parser.parseAbsoluteExpression(Flags))
+ return TokError("unexpected token");
+
+ getTargetStreamer().emitMipsHackSTOCG(Sym, Flags);
+ return false;
+}
+
+bool MipsAsmParser::parseDirectiveMipsHackELFFlags() {
+ int64_t Flags = 0;
+ if (Parser.parseAbsoluteExpression(Flags))
+ return TokError("unexpected token");
+
+ getTargetStreamer().emitMipsHackELFFlags(Flags);
+ return false;
+}
+
/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
@@ -1835,6 +2423,22 @@ bool MipsAsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// parseDirectiveGpWord
+/// ::= .gpword local_sym
+bool MipsAsmParser::parseDirectiveGpWord() {
+ const MCExpr *Value;
+ // EmitGPRel32Value requires an expression, so we are using base class
+ // method to evaluate the expression.
+ if (getParser().parseExpression(Value))
+ return true;
+ getParser().getStreamer().EmitGPRel32Value(Value);
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(getLexer().getLoc(), "unexpected token in directive");
+ Parser.Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -1875,7 +2479,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".gpword") {
// Ignore this directive for now.
- Parser.eatToEndOfStatement();
+ parseDirectiveGpWord();
return false;
}
@@ -1884,6 +2488,12 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
}
+ if (IDVal == ".mips_hack_stocg")
+ return parseDirectiveMipsHackStocg();
+
+ if (IDVal == ".mips_hack_elf_flags")
+ return parseDirectiveMipsHackELFFlags();
+
return true;
}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index aedb78b..6acc9a8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -35,7 +35,6 @@ add_llvm_target(MipsCodeGen
MipsMachineFunction.cpp
MipsModuleISelDAGToDAG.cpp
MipsOs16.cpp
- MipsOptimizeMathLibCalls.cpp
MipsRegisterInfo.cpp
MipsSEFrameLowering.cpp
MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index d99df4d..60508a8 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -35,26 +35,33 @@ public:
///
MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
bool bigEndian) :
- MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {}
+ MCDisassembler(STI), RegInfo(Info),
+ IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {}
virtual ~MipsDisassemblerBase() {}
const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
+ bool isN64() const { return IsN64; }
+
private:
OwningPtr<const MCRegisterInfo> RegInfo;
+ bool IsN64;
protected:
bool isBigEndian;
};
/// MipsDisassembler - a disasembler class for Mips32.
class MipsDisassembler : public MipsDisassemblerBase {
+ bool IsMicroMips;
public:
/// Constructor - Initializes the disassembler.
///
MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
bool bigEndian) :
- MipsDisassemblerBase(STI, Info, bigEndian) {}
+ MipsDisassemblerBase(STI, Info, bigEndian) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
/// getInstruction - See MCDisassembler.
virtual DecodeStatus getInstruction(MCInst &instr,
@@ -103,10 +110,15 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst,
unsigned RegNo,
@@ -118,6 +130,11 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -138,20 +155,45 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
-static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
@@ -163,11 +205,38 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeBranchTargetMM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
+// DecodeJumpTargetMM - Decode microMIPS jump target, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -177,6 +246,13 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// Decode the immediate field of an LSA instruction which
+// is off by one.
+static DecodeStatus DecodeLSAImm(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -237,7 +313,8 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
uint64_t address,
uint64_t &size,
uint32_t &insn,
- bool isBigEndian) {
+ bool isBigEndian,
+ bool IsMicroMips) {
uint8_t Bytes[4];
// We want to read exactly 4 Bytes of data.
@@ -255,10 +332,20 @@ static DecodeStatus readInstruction32(const MemoryObject &region,
}
else {
// Encoded as a small-endian 32-bit word in the stream.
- insn = (Bytes[0] << 0) |
- (Bytes[1] << 8) |
- (Bytes[2] << 16) |
- (Bytes[3] << 24);
+ // Little-endian byte ordering:
+ // mips32r2: 4 | 3 | 2 | 1
+ // microMIPS: 2 | 1 | 4 | 3
+ if (IsMicroMips) {
+ insn = (Bytes[2] << 0) |
+ (Bytes[3] << 8) |
+ (Bytes[0] << 16) |
+ (Bytes[1] << 24);
+ } else {
+ insn = (Bytes[0] << 0) |
+ (Bytes[1] << 8) |
+ (Bytes[2] << 16) |
+ (Bytes[3] << 24);
+ }
}
return MCDisassembler::Success;
@@ -274,10 +361,21 @@ MipsDisassembler::getInstruction(MCInst &instr,
uint32_t Insn;
DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian);
+ Insn, isBigEndian, IsMicroMips);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
+ if (IsMicroMips) {
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
+ this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ return MCDisassembler::Fail;
+ }
+
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
this, STI);
@@ -299,7 +397,7 @@ Mips64Disassembler::getInstruction(MCInst &instr,
uint32_t Insn;
DecodeStatus Result = readInstruction32(Region, Address, Size,
- Insn, isBigEndian);
+ Insn, isBigEndian, false);
if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
@@ -359,10 +457,20 @@ static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeDSPRegsRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodePtrRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (static_cast<const MipsDisassembler *>(Decoder)->isN64())
+ return DecodeGPR64RegisterClass(Inst, RegNo, Address, Decoder);
+
+ return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeDSPRRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
return DecodeGPR32RegisterClass(Inst, RegNo, Address, Decoder);
}
@@ -390,6 +498,18 @@ static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFGRH32RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGRH32RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -434,6 +554,58 @@ static DecodeStatus DecodeMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
+ unsigned Reg = fieldFromInstruction(Insn, 6, 5);
+ unsigned Base = fieldFromInstruction(Insn, 11, 5);
+
+ Reg = getReg(Decoder, Mips::MSA128BRegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<12>(Insn & 0x0fff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemMMImm16(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeFMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -477,38 +649,98 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeACC64DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::ACC64DSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeHI32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::HI32DSPRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeLO32DSPRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo >= 4)
return MCDisassembler::Fail;
- unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
+ unsigned Reg = getReg(Decoder, Mips::LO32DSPRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128BRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128BRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128HRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128HRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128WRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128WRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSA128DRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSA128DRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 7)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::MSACtrlRegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
@@ -533,6 +765,24 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned BranchOffset = Offset & 0xffff;
+ BranchOffset = SignExtend32<18>(BranchOffset << 1);
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 1;
+ Inst.addOperand(MCOperand::CreateImm(JumpOffset));
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
@@ -542,6 +792,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeLSAImm(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // We add one to the immediate field as it was encoded as 'imm - 1'.
+ Inst.addOperand(MCOperand::CreateImm(Insn + 1));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 369fece..7884589 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -184,6 +184,15 @@ void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
printOperand(MI, opNum, O);
}
+void MipsInstPrinter::printUnsignedImm8(const MCInst *MI, int opNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)(unsigned char)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
void MipsInstPrinter::
printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
@@ -211,6 +220,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
O << MipsFCCToString((Mips::CondCode)MO.getImm());
}
+void MipsInstPrinter::
+printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) {
+ llvm_unreachable("TODO");
+}
+
bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
unsigned OpNo, raw_ostream &OS) {
OS << "\t" << Str << "\t";
@@ -230,8 +244,11 @@ bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI,
bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
switch (MI.getOpcode()) {
case Mips::BEQ:
+ // beq $zero, $zero, $L2 => b $L2
// beq $r0, $zero, $L2 => beqz $r0, $L2
- return isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
+ return (isReg<Mips::ZERO>(MI, 0) && isReg<Mips::ZERO>(MI, 1) &&
+ printAlias("b", MI, 2, OS)) ||
+ (isReg<Mips::ZERO>(MI, 1) && printAlias("beqz", MI, 0, 2, OS));
case Mips::BEQ64:
// beq $r0, $zero, $L2 => beqz $r0, $L2
return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
@@ -257,6 +274,7 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
// jalr $ra, $r1 => jalr $r1
return isReg<Mips::RA_64>(MI, 0) && printAlias("jalr", MI, 1, OS);
case Mips::NOR:
+ case Mips::NOR_MM:
// nor $r0, $r1, $zero => not $r0, $r1
return isReg<Mips::ZERO>(MI, 2) && printAlias("not", MI, 0, 1, OS);
case Mips::NOR64:
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 1253ab0..f75ae24 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -93,9 +93,11 @@ public:
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm8(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+ void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O);
bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo,
raw_ostream &OS);
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 1f08789..9116748 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(LLVMMipsDesc
MipsMCTargetDesc.cpp
MipsELFObjectWriter.cpp
MipsReginfo.cpp
- MipsELFStreamer.cpp
+ MipsTargetStreamer.cpp
)
add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 0b13607..3e70b23 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -45,6 +45,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_GOT_DISP:
case Mips::fixup_Mips_GOT_LO16:
case Mips::fixup_Mips_CALL_LO16:
+ case Mips::fixup_MICROMIPS_LO16:
+ case Mips::fixup_MICROMIPS_GOT_PAGE:
+ case Mips::fixup_MICROMIPS_GOT_OFST:
+ case Mips::fixup_MICROMIPS_GOT_DISP:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -65,6 +69,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Mips::fixup_Mips_GOT_Local:
case Mips::fixup_Mips_GOT_HI16:
case Mips::fixup_Mips_CALL_HI16:
+ case Mips::fixup_MICROMIPS_HI16:
// Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
@@ -76,6 +81,13 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
// Get the 4th 16-bits.
Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
break;
+ case Mips::fixup_MICROMIPS_26_S1:
+ Value >>= 1;
+ break;
+ case Mips::fixup_MICROMIPS_PC16_S1:
+ Value -= 4;
+ Value >>= 1;
+ break;
}
return Value;
@@ -188,7 +200,20 @@ public:
{ "fixup_Mips_GOT_HI16", 0, 16, 0 },
{ "fixup_Mips_GOT_LO16", 0, 16, 0 },
{ "fixup_Mips_CALL_HI16", 0, 16, 0 },
- { "fixup_Mips_CALL_LO16", 0, 16, 0 }
+ { "fixup_Mips_CALL_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_26_S1", 0, 26, 0 },
+ { "fixup_MICROMIPS_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT16", 0, 16, 0 },
+ { "fixup_MICROMIPS_PC16_S1", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MICROMIPS_CALL16", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_DISP", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_PAGE", 0, 16, 0 },
+ { "fixup_MICROMIPS_GOT_OFST", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_HI16", 0, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -253,25 +278,33 @@ public:
} // namespace
// MCAsmBackend
-MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/true, /*Is64Bit*/false);
}
-MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/false, /*Is64Bit*/false);
}
-MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/true, /*Is64Bit*/true);
}
-MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new MipsAsmBackend(T, Triple(TT).getOS(),
/*IsLittle*/false, /*Is64Bit*/true);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 6471b51..83c7d4b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -183,6 +183,45 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_Mips_CALL_LO16:
Type = ELF::R_MIPS_CALL_LO16;
break;
+ case Mips::fixup_MICROMIPS_26_S1:
+ Type = ELF::R_MICROMIPS_26_S1;
+ break;
+ case Mips::fixup_MICROMIPS_HI16:
+ Type = ELF::R_MICROMIPS_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_LO16:
+ Type = ELF::R_MICROMIPS_LO16;
+ break;
+ case Mips::fixup_MICROMIPS_GOT16:
+ Type = ELF::R_MICROMIPS_GOT16;
+ break;
+ case Mips::fixup_MICROMIPS_PC16_S1:
+ Type = ELF::R_MICROMIPS_PC16_S1;
+ break;
+ case Mips::fixup_MICROMIPS_CALL16:
+ Type = ELF::R_MICROMIPS_CALL16;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_DISP:
+ Type = ELF::R_MICROMIPS_GOT_DISP;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_PAGE:
+ Type = ELF::R_MICROMIPS_GOT_PAGE;
+ break;
+ case Mips::fixup_MICROMIPS_GOT_OFST:
+ Type = ELF::R_MICROMIPS_GOT_OFST;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_DTPREL_HI16:
+ Type = ELF::R_MICROMIPS_TLS_DTPREL_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_DTPREL_LO16:
+ Type = ELF::R_MICROMIPS_TLS_DTPREL_LO16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_TPREL_HI16:
+ Type = ELF::R_MICROMIPS_TLS_TPREL_HI16;
+ break;
+ case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
+ Type = ELF::R_MICROMIPS_TLS_TPREL_LO16;
+ break;
}
return Type;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
deleted file mode 100644
index cfcb877..0000000
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-//===-- MipsELFStreamer.cpp - MipsELFStreamer ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===-------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsELFStreamer.h"
-#include "MipsSubtarget.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
- MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
- MipsELFStreamer *S = new MipsELFStreamer(Context, TAB, OS, Emitter,
- RelaxAll, NoExecStack);
- return S;
- }
-
- // For llc. Set a group of ELF header flags
- void
- MipsELFStreamer::emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget) {
-
- if (hasRawTextSupport())
- return;
-
- // Update e_header flags
- MCAssembler& MCA = getAssembler();
- unsigned EFlags = MCA.getELFHeaderEFlags();
-
- // TODO: Need to add -mabicalls and -mno-abicalls flags.
- // Currently we assume that -mabicalls is the default.
- EFlags |= ELF::EF_MIPS_CPIC;
-
- if (Subtarget.inMips16Mode())
- EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
- else
- EFlags |= ELF::EF_MIPS_NOREORDER;
-
- // Architecture
- if (Subtarget.hasMips64r2())
- EFlags |= ELF::EF_MIPS_ARCH_64R2;
- else if (Subtarget.hasMips64())
- EFlags |= ELF::EF_MIPS_ARCH_64;
- else if (Subtarget.hasMips32r2())
- EFlags |= ELF::EF_MIPS_ARCH_32R2;
- else
- EFlags |= ELF::EF_MIPS_ARCH_32;
-
- if (Subtarget.inMicroMipsMode())
- EFlags |= ELF::EF_MIPS_MICROMIPS;
-
- // ABI
- if (Subtarget.isABI_O32())
- EFlags |= ELF::EF_MIPS_ABI_O32;
-
- // Relocation Model
- Reloc::Model RM = Subtarget.getRelocationModel();
- if (RM == Reloc::PIC_ || RM == Reloc::Default)
- EFlags |= ELF::EF_MIPS_PIC;
- else if (RM == Reloc::Static)
- ; // Do nothing for Reloc::Static
- else
- llvm_unreachable("Unsupported relocation model for e_flags");
-
- MCA.setELFHeaderEFlags(EFlags);
- }
-
- // For llc. Set a symbol's STO flags
- void
- MipsELFStreamer::emitMipsSTOCG(const MipsSubtarget &Subtarget,
- MCSymbol *Sym,
- unsigned Val) {
-
- if (hasRawTextSupport())
- return;
-
- MCSymbolData &Data = getOrCreateSymbolData(Sym);
- // The "other" values are stored in the last 6 bits of the second byte
- // The traditional defines for STO values assume the full byte and thus
- // the shift to pack it.
- MCELF::setOther(Data, Val >> 2);
- }
-
-} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
deleted file mode 100644
index b10ccc7..0000000
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//=== MipsELFStreamer.h - MipsELFStreamer ------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENCE.TXT for details.
-//
-//===-------------------------------------------------------------------===//
-#ifndef MIPSELFSTREAMER_H_
-#define MIPSELFSTREAMER_H_
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-class MipsAsmPrinter;
-class MipsSubtarget;
-class MCSymbol;
-
-class MipsELFStreamer : public MCELFStreamer {
-public:
- MipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack)
- : MCELFStreamer(SK_MipsELFStreamer, Context, TAB, OS, Emitter) {
- }
-
- ~MipsELFStreamer() {}
- void emitELFHeaderFlagsCG(const MipsSubtarget &Subtarget);
- void emitMipsSTOCG(const MipsSubtarget &Subtarget,
- MCSymbol *Sym,
- unsigned Val);
-
- static bool classof(const MCStreamer *S) {
- return S->getKind() == SK_MipsELFStreamer;
- }
-};
-
- MCELFStreamer* createMipsELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
-}
-
-#endif /* MIPSELFSTREAMER_H_ */
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index f963900..6ed44b7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -128,6 +128,45 @@ namespace Mips {
// resulting in - R_MIPS_CALL_LO16
fixup_Mips_CALL_LO16,
+ // resulting in - R_MICROMIPS_26_S1
+ fixup_MICROMIPS_26_S1,
+
+ // resulting in - R_MICROMIPS_HI16
+ fixup_MICROMIPS_HI16,
+
+ // resulting in - R_MICROMIPS_LO16
+ fixup_MICROMIPS_LO16,
+
+ // resulting in - R_MICROMIPS_GOT16
+ fixup_MICROMIPS_GOT16,
+
+ // resulting in - R_MICROMIPS_PC16_S1
+ fixup_MICROMIPS_PC16_S1,
+
+ // resulting in - R_MICROMIPS_CALL16
+ fixup_MICROMIPS_CALL16,
+
+ // resulting in - R_MICROMIPS_GOT_DISP
+ fixup_MICROMIPS_GOT_DISP,
+
+ // resulting in - R_MICROMIPS_GOT_PAGE
+ fixup_MICROMIPS_GOT_PAGE,
+
+ // resulting in - R_MICROMIPS_GOT_OFST
+ fixup_MICROMIPS_GOT_OFST,
+
+ // resulting in - R_MICROMIPS_TLS_DTPREL_HI16
+ fixup_MICROMIPS_TLS_DTPREL_HI16,
+
+ // resulting in - R_MICROMIPS_TLS_DTPREL_LO16
+ fixup_MICROMIPS_TLS_DTPREL_LO16,
+
+ // resulting in - R_MICROMIPS_TLS_TPREL_HI16
+ fixup_MICROMIPS_TLS_TPREL_HI16,
+
+ // resulting in - R_MICROMIPS_TLS_TPREL_LO16
+ fixup_MICROMIPS_TLS_TPREL_LO16,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 33f6f96..6aa3c76 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -38,7 +38,6 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
- WeakRefDirective = "\t.weak\t";
DebugLabelSuffix = "=.";
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 772234e..1000113 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef MIPSTARGETASMINFO_H
#define MIPSTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class MipsMCAsmInfo : public MCAsmInfo {
+ class MipsMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit MipsMCAsmInfo(StringRef TT);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 4dc6917..66428bd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -39,11 +39,14 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
const MCSubtargetInfo &STI;
bool IsLittleEndian;
+ bool IsMicroMips;
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
const MCSubtargetInfo &sti, bool IsLittle) :
- MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
+ MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
~MipsMCCodeEmitter() {}
@@ -53,9 +56,17 @@ public:
void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the instruction encoding in little endian byte order.
- for (unsigned i = 0; i < Size; ++i) {
- unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
- EmitByte((Val >> Shift) & 0xff, OS);
+ // Little-endian byte ordering:
+ // mips32r2: 4 | 3 | 2 | 1
+ // microMIPS: 2 | 1 | 4 | 3
+ if (IsLittleEndian && Size == 4 && IsMicroMips) {
+ EmitInstruction(Val>>16, 2, OS);
+ EmitInstruction(Val, 2, OS);
+ } else {
+ for (unsigned i = 0; i < Size; ++i) {
+ unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+ EmitByte((Val >> Shift) & 0xff, OS);
+ }
}
}
@@ -73,12 +84,24 @@ public:
unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getBranchJumpOpValueMM - Return binary encoding of the microMIPS jump
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// getBranchTargetOpValue - Return binary encoding of the branch
// target operand. If the machine operand requires relocation,
// record the relocation and return zero.
unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getBranchTargetOpValue - Return binary encoding of the microMIPS branch
+ // target operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
// getMachineOpValue - Return binary encoding of operand. If the machin
// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
@@ -86,11 +109,17 @@ public:
unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ // getLSAImmEncoding - Return binary encoding of LSA immediate.
+ unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+
unsigned
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
@@ -142,6 +171,9 @@ static void LowerLargeShift(MCInst& Inst) {
case Mips::DSRA:
Inst.setOpcode(Mips::DSRA32);
return;
+ case Mips::DROTR:
+ Inst.setOpcode(Mips::DROTR32);
+ return;
}
}
@@ -177,7 +209,7 @@ static void LowerDextDins(MCInst& InstIn) {
}
/// EncodeInstruction - Emit the instruction.
-/// Size the instruction (currently only 4 bytes
+/// Size the instruction with Desc.getSize().
void MipsMCCodeEmitter::
EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const
@@ -193,6 +225,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case Mips::DSLL:
case Mips::DSRL:
case Mips::DSRA:
+ case Mips::DROTR:
LowerLargeShift(TmpInst);
break;
// Double extract instruction is chosen by pos and size operands
@@ -201,6 +234,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
LowerDextDins(TmpInst);
}
+ unsigned long N = Fixups.size();
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups);
// Check for unimplemented opcodes.
@@ -213,6 +247,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
if (NewOpcode != -1) {
+ if (Fixups.size() > N)
+ Fixups.pop_back();
Opcode = NewOpcode;
TmpInst.setOpcode (NewOpcode);
Binary = getBinaryCodeForInstr(TmpInst, Fixups);
@@ -250,6 +286,28 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTargetOpValue - Return binary encoding of the microMIPS branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm()) return MO.getImm() >> 1;
+
+ assert(MO.isExpr() &&
+ "getBranchTargetOpValueMM expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::
+ fixup_MICROMIPS_PC16_S1)));
+ return 0;
+}
+
/// getJumpTargetOpValue - Return binary encoding of the jump
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -271,6 +329,23 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm()) return MO.getImm() >> 1;
+
+ assert(MO.isExpr() &&
+ "getJumpTargetOpValueMM expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MICROMIPS_26_S1)));
+ return 0;
+}
+
+unsigned MipsMCCodeEmitter::
getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
int64_t Res;
@@ -300,31 +375,39 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
FixupKind = Mips::fixup_Mips_GPOFF_LO;
break;
case MCSymbolRefExpr::VK_Mips_GOT_PAGE :
- FixupKind = Mips::fixup_Mips_GOT_PAGE;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_PAGE
+ : Mips::fixup_Mips_GOT_PAGE;
break;
case MCSymbolRefExpr::VK_Mips_GOT_OFST :
- FixupKind = Mips::fixup_Mips_GOT_OFST;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_OFST
+ : Mips::fixup_Mips_GOT_OFST;
break;
case MCSymbolRefExpr::VK_Mips_GOT_DISP :
- FixupKind = Mips::fixup_Mips_GOT_DISP;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT_DISP
+ : Mips::fixup_Mips_GOT_DISP;
break;
case MCSymbolRefExpr::VK_Mips_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
case MCSymbolRefExpr::VK_Mips_GOT_CALL:
- FixupKind = Mips::fixup_Mips_CALL16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_CALL16
+ : Mips::fixup_Mips_CALL16;
break;
case MCSymbolRefExpr::VK_Mips_GOT16:
- FixupKind = Mips::fixup_Mips_GOT_Global;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16
+ : Mips::fixup_Mips_GOT_Global;
break;
case MCSymbolRefExpr::VK_Mips_GOT:
- FixupKind = Mips::fixup_Mips_GOT_Local;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_GOT16
+ : Mips::fixup_Mips_GOT_Local;
break;
case MCSymbolRefExpr::VK_Mips_ABS_HI:
- FixupKind = Mips::fixup_Mips_HI16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_HI16
+ : Mips::fixup_Mips_HI16;
break;
case MCSymbolRefExpr::VK_Mips_ABS_LO:
- FixupKind = Mips::fixup_Mips_LO16;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_LO16
+ : Mips::fixup_Mips_LO16;
break;
case MCSymbolRefExpr::VK_Mips_TLSGD:
FixupKind = Mips::fixup_Mips_TLSGD;
@@ -333,19 +416,23 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
FixupKind = Mips::fixup_Mips_TLSLDM;
break;
case MCSymbolRefExpr::VK_Mips_DTPREL_HI:
- FixupKind = Mips::fixup_Mips_DTPREL_HI;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_HI16
+ : Mips::fixup_Mips_DTPREL_HI;
break;
case MCSymbolRefExpr::VK_Mips_DTPREL_LO:
- FixupKind = Mips::fixup_Mips_DTPREL_LO;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_DTPREL_LO16
+ : Mips::fixup_Mips_DTPREL_LO;
break;
case MCSymbolRefExpr::VK_Mips_GOTTPREL:
FixupKind = Mips::fixup_Mips_GOTTPREL;
break;
case MCSymbolRefExpr::VK_Mips_TPREL_HI:
- FixupKind = Mips::fixup_Mips_TPREL_HI;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_HI16
+ : Mips::fixup_Mips_TPREL_HI;
break;
case MCSymbolRefExpr::VK_Mips_TPREL_LO:
- FixupKind = Mips::fixup_Mips_TPREL_LO;
+ FixupKind = IsMicroMips ? Mips::fixup_MICROMIPS_TLS_TPREL_LO16
+ : Mips::fixup_Mips_TPREL_LO;
break;
case MCSymbolRefExpr::VK_Mips_HIGHER:
FixupKind = Mips::fixup_Mips_HIGHER;
@@ -406,6 +493,17 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
return (OffBits & 0xFFFF) | RegBits;
}
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 11-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups);
+
+ return (OffBits & 0x0FFF) | RegBits;
+}
+
unsigned
MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
@@ -427,5 +525,13 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
return Position + Size - 1;
}
+unsigned
+MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ assert(MI.getOperand(OpNo).isImm());
+ // The immediate is encoded as 'immediate - 1'.
+ return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups) - 1;
+}
+
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 837fabe..5548aaa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,17 +11,21 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsELFStreamer.h"
#include "MipsMCTargetDesc.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsMCAsmInfo.h"
+#include "MipsTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -125,14 +129,23 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
}
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ MCContext &Context, MCAsmBackend &MAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ MipsTargetELFStreamer *S = new MipsTargetELFStreamer();
+ return createELFStreamer(Context, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
+ MipsTargetAsmStreamer *S = new MipsTargetAsmStreamer(OS);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
}
extern "C" void LLVMInitializeMipsTargetMC() {
@@ -183,6 +196,12 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
+
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
createMipsAsmBackendEB32);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 71954a4..eabebfe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -42,14 +42,14 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT,
- StringRef CPU);
-MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEB32(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL32(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEB64(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createMipsAsmBackendEL64(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
new file mode 100644
index 0000000..5e90bbc
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -0,0 +1,67 @@
+//===-- MipsTargetStreamer.cpp - Mips Target Streamer Methods -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target streamer methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> PrintHackDirectives("print-hack-directives",
+ cl::init(false), cl::Hidden);
+
+// pin vtable to this file
+void MipsTargetStreamer::anchor() {}
+
+MipsTargetAsmStreamer::MipsTargetAsmStreamer(formatted_raw_ostream &OS)
+ : OS(OS) {}
+
+void MipsTargetAsmStreamer::emitMipsHackELFFlags(unsigned Flags) {
+ if (!PrintHackDirectives)
+ return;
+
+ OS << "\t.mips_hack_elf_flags 0x";
+ OS.write_hex(Flags);
+ OS << '\n';
+}
+void MipsTargetAsmStreamer::emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) {
+ if (!PrintHackDirectives)
+ return;
+
+ OS << "\t.mips_hack_stocg ";
+ OS << Sym->getName();
+ OS << ", ";
+ OS << Val;
+ OS << '\n';
+}
+
+MCELFStreamer &MipsTargetELFStreamer::getStreamer() {
+ return static_cast<MCELFStreamer &>(*Streamer);
+}
+
+void MipsTargetELFStreamer::emitMipsHackELFFlags(unsigned Flags) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCA.setELFHeaderEFlags(Flags);
+}
+
+// Set a symbol's STO flags
+void MipsTargetELFStreamer::emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) {
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(Sym);
+ // The "other" values are stored in the last 6 bits of the second byte
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ MCELF::setOther(Data, Val >> 2);
+}
diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt
new file mode 100644
index 0000000..d1c4193
--- /dev/null
+++ b/lib/Target/Mips/MSA.txt
@@ -0,0 +1,78 @@
+Code Generation Notes for MSA
+=============================
+
+Intrinsics are lowered to SelectionDAG nodes where possible in order to enable
+optimisation, reduce the size of the ISel matcher, and reduce repetition in
+the implementation. In a small number of cases, this can cause different
+(semantically equivalent) instructions to be used in place of the requested
+instruction, even when no optimisation has taken place.
+
+Instructions
+============
+
+This section describes any quirks of instruction selection for MSA. For
+example, two instructions might be equally valid for some given IR and one is
+chosen in preference to the other.
+
+bclri.b:
+ It is not possible to emit bclri.b since andi.b covers exactly the
+ same cases. andi.b should use fractionally less power than bclri.b in
+ most hardware implementations so it is used in preference to bclri.b.
+
+vshf.w:
+ It is not possible to emit vshf.w when the shuffle description is
+ constant since shf.w covers exactly the same cases. shf.w is used
+ instead. It is also impossible for the shuffle description to be
+ unknown at compile-time due to the definition of shufflevector in
+ LLVM IR.
+
+vshf.[bhwd]
+ When the shuffle description describes a splat operation, splat.[bhwd]
+ instructions will be selected instead of vshf.[bhwd]. Unlike the ilv*,
+ and pck* instructions, this is matched from MipsISD::VSHF instead of
+ a special-case MipsISD node.
+
+ilvl.d, pckev.d:
+ It is not possible to emit ilvl.d, or pckev.d since ilvev.d covers the
+ same shuffle. ilvev.d will be emitted instead.
+
+ilvr.d, ilvod.d, pckod.d:
+ It is not possible to emit ilvr.d, or pckod.d since ilvod.d covers the
+ same shuffle. ilvod.d will be emitted instead.
+
+splat.[bhwd]
+ The intrinsic will work as expected. However, unlike other intrinsics
+ it lowers directly to MipsISD::VSHF instead of using common IR.
+
+splati.w:
+ It is not possible to emit splati.w since shf.w covers the same cases.
+ shf.w will be emitted instead.
+
+copy_s.w:
+ On MIPS32, the copy_u.d intrinsic will emit this instruction instead of
+ copy_u.w. This is semantically equivalent since the general-purpose
+ register file is 32-bits wide.
+
+binsri.[bhwd], binsli.[bhwd]:
+ These two operations are equivalent to each other with the operands
+ swapped and condition inverted. The compiler may use either one as
+ appropriate.
+ Furthermore, the compiler may use bsel.[bhwd] for some masks that do
+ not survive the legalization process (this is a bug and will be fixed).
+
+bmnz.v, bmz.v, bsel.v:
+ These three operations differ only in the operand that is tied to the
+ result.
+ It is (currently) not possible to emit bmz.v, or bsel.v since bmnz.v is
+ the same operation and will be emitted instead.
+ In future, the compiler may choose between these three instructions
+ according to register allocation.
+
+bmnzi.b, bmzi.b:
+ Like their non-immediate counterparts, bmnzi.v and bmzi.v are the same
+ operation with the operands swapped. bmnzi.v will (currently) be emitted
+ for both cases.
+
+bseli.v:
+ Unlike the non-immediate versions, bseli.v is distinguishable from
+ bmnzi.b and bmzi.b and can be emitted.
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 665b4d2..c12a32e 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -39,8 +39,8 @@ class SLTI_FM_MM<bits<6> op> : MMArch {
bits<32> Inst;
let Inst{31-26} = op;
- let Inst{25-21} = rs;
- let Inst{20-16} = rt;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
let Inst{15-0} = imm16;
}
@@ -110,3 +110,195 @@ class LW_FM_MM<bits<6> op> : MMArch {
let Inst{20-16} = addr{20-16};
let Inst{15-0} = addr{15-0};
}
+
+class LWL_FM_MM<bits<4> funct> {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = funct;
+ let Inst{11-0} = addr{11-0};
+}
+
+class CMov_F_I_FM_MM<bits<7> func> : MMArch {
+ bits<5> rd;
+ bits<5> rs;
+ bits<3> fcc;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x15;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-13} = fcc;
+ let Inst{12-6} = func;
+ let Inst{5-0} = 0x3b;
+}
+
+class MTLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = 0x00;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class MFLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = 0x00;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class CLO_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class SEB_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class EXT_FM_MM<bits<6> funct> : MMArch {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> pos;
+ bits<5> size;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class J_FM_MM<bits<6> op> : MMArch {
+ bits<26> target;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-0} = target;
+}
+
+class JR_FM_MM<bits<8> funct> : MMArch {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-21} = 0x00;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = 0x0;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class JALR_FM_MM<bits<10> funct> : MMArch {
+ bits<5> rs;
+ bits<5> rd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class BEQ_FM_MM<bits<6> op> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class BGEZ_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class BGEZAL_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = offset;
+}
+
+class TEQ_FM_MM<bits<6> funct> : MMArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<4> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = code_;
+ let Inst{11-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class TEQI_FM_MM<bits<5> funct> : MMArch {
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm16;
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 249d712..d9507fa 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1,4 +1,51 @@
-let isCodeGenOnly = 1 in {
+def addrimm12 : ComplexPattern<iPTR, 2, "selectIntAddrMM", [frameindex]>;
+
+def simm12 : Operand<i32> {
+ let DecoderMethod = "DecodeSimm12";
+}
+
+def mem_mm_12 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm12);
+ let EncoderMethod = "getMemEncodingMMImm12";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
+def jmptarget_mm : Operand<OtherVT> {
+ let EncoderMethod = "getJumpTargetOpValueMM";
+}
+
+def calltarget_mm : Operand<iPTR> {
+ let EncoderMethod = "getJumpTargetOpValueMM";
+}
+
+def brtarget_mm : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValueMM";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTargetMM";
+}
+
+let canFoldAsLoad = 1 in
+class LoadLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
+ Operand MemOpnd> :
+ InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(set RO:$rt, (OpNode addrimm12:$addr, RO:$src))],
+ NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm12";
+ string Constraints = "$src = $rt";
+}
+
+class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
+ Operand MemOpnd>:
+ InstSE<(outs), (ins RO:$rt, MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm12";
+}
+
+let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
/// Arithmetic Instructions (ALU Immediate)
def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>,
ADDI_FM_MM<0xc>;
@@ -32,17 +79,21 @@ let isCodeGenOnly = 1 in {
def XOR_MM : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IIAlu, xor>,
ADD_FM_MM<0, 0x310>;
def NOR_MM : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM_MM<0, 0x2d0>;
- def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI, LO]>,
+ def MULT_MM : MMRel, Mult<"mult", IIImul, GPR32Opnd, [HI0, LO0]>,
MULT_FM_MM<0x22c>;
- def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI, LO]>,
+ def MULTu_MM : MMRel, Mult<"multu", IIImul, GPR32Opnd, [HI0, LO0]>,
MULT_FM_MM<0x26c>;
+ def SDIV_MM : MMRel, Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM_MM<0x2ac>;
+ def UDIV_MM : MMRel, Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM_MM<0x2ec>;
/// Shift Instructions
- def SLL_MM : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd>,
+ def SLL_MM : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd>,
SRA_FM_MM<0, 0>;
- def SRL_MM : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd>,
+ def SRL_MM : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd>,
SRA_FM_MM<0x40, 0>;
- def SRA_MM : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd>,
+ def SRA_MM : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd>,
SRA_FM_MM<0x80, 0>;
def SLLV_MM : MMRel, shift_rotate_reg<"sllv", GPR32Opnd>,
SRLV_FM_MM<0x10, 0>;
@@ -50,18 +101,119 @@ let isCodeGenOnly = 1 in {
SRLV_FM_MM<0x50, 0>;
def SRAV_MM : MMRel, shift_rotate_reg<"srav", GPR32Opnd>,
SRLV_FM_MM<0x90, 0>;
- def ROTR_MM : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd>,
+ def ROTR_MM : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd>,
SRA_FM_MM<0xc0, 0>;
def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd>,
SRLV_FM_MM<0xd0, 0>;
/// Load and Store Instructions - aligned
- defm LB_MM : LoadM<"lb", GPR32Opnd, sextloadi8>, MMRel, LW_FM_MM<0x7>;
- defm LBu_MM : LoadM<"lbu", GPR32Opnd, zextloadi8>, MMRel, LW_FM_MM<0x5>;
- defm LH_MM : LoadM<"lh", GPR32Opnd, sextloadi16>, MMRel, LW_FM_MM<0xf>;
- defm LHu_MM : LoadM<"lhu", GPR32Opnd, zextloadi16>, MMRel, LW_FM_MM<0xd>;
- defm LW_MM : LoadM<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>;
- defm SB_MM : StoreM<"sb", GPR32Opnd, truncstorei8>, MMRel, LW_FM_MM<0x6>;
- defm SH_MM : StoreM<"sh", GPR32Opnd, truncstorei16>, MMRel, LW_FM_MM<0xe>;
- defm SW_MM : StoreM<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
+ let DecoderMethod = "DecodeMemMMImm16" in {
+ def LB_MM : Load<"lb", GPR32Opnd>, MMRel, LW_FM_MM<0x7>;
+ def LBu_MM : Load<"lbu", GPR32Opnd>, MMRel, LW_FM_MM<0x5>;
+ def LH_MM : Load<"lh", GPR32Opnd>, MMRel, LW_FM_MM<0xf>;
+ def LHu_MM : Load<"lhu", GPR32Opnd>, MMRel, LW_FM_MM<0xd>;
+ def LW_MM : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>;
+ def SB_MM : Store<"sb", GPR32Opnd>, MMRel, LW_FM_MM<0x6>;
+ def SH_MM : Store<"sh", GPR32Opnd>, MMRel, LW_FM_MM<0xe>;
+ def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
+ }
+
+ /// Load and Store Instructions - unaligned
+ def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x0>;
+ def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x1>;
+ def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x8>;
+ def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
+ LWL_FM_MM<0x9>;
+
+ /// Move Conditional
+ def MOVZ_I_MM : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd,
+ NoItinerary>, ADD_FM_MM<0, 0x58>;
+ def MOVN_I_MM : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd,
+ NoItinerary>, ADD_FM_MM<0, 0x18>;
+ def MOVT_I_MM : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIAlu>,
+ CMov_F_I_FM_MM<0x25>;
+ def MOVF_I_MM : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIAlu>,
+ CMov_F_I_FM_MM<0x5>;
+
+ /// Move to/from HI/LO
+ def MTHI_MM : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>,
+ MTLO_FM_MM<0x0b5>;
+ def MTLO_MM : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>,
+ MTLO_FM_MM<0x0f5>;
+ def MFHI_MM : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>,
+ MFLO_FM_MM<0x035>;
+ def MFLO_MM : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>,
+ MFLO_FM_MM<0x075>;
+
+ /// Multiply Add/Sub Instructions
+ def MADD_MM : MMRel, MArithR<"madd", 1>, MULT_FM_MM<0x32c>;
+ def MADDU_MM : MMRel, MArithR<"maddu", 1>, MULT_FM_MM<0x36c>;
+ def MSUB_MM : MMRel, MArithR<"msub">, MULT_FM_MM<0x3ac>;
+ def MSUBU_MM : MMRel, MArithR<"msubu">, MULT_FM_MM<0x3ec>;
+
+ /// Count Leading
+ def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>;
+ def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>;
+
+ /// Sign Ext In Register Instructions.
+ def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM_MM<0x0ac>;
+ def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM_MM<0x0ec>;
+
+ /// Word Swap Bytes Within Halfwords
+ def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>;
+
+ def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
+ EXT_FM_MM<0x2c>;
+ def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>,
+ EXT_FM_MM<0x0c>;
+
+ /// Jump Instructions
+ let DecoderMethod = "DecodeJumpTargetMM" in {
+ def J_MM : MMRel, JumpFJ<jmptarget_mm, "j", br, bb, "j">,
+ J_FM_MM<0x35>;
+ def JAL_MM : MMRel, JumpLink<"jal", calltarget_mm>, J_FM_MM<0x3d>;
+ def TAILCALL_MM : MMRel, JumpFJ<calltarget_mm, "j", MipsTailCall, imm,
+ "tcall">, J_FM_MM<0x3d>, IsTailCall;
+ }
+ def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
+ def JALR_MM : MMRel, JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
+ def TAILCALL_R_MM : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>,
+ JR_FM_MM<0x3c>, IsTailCall;
+ def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>;
+
+ /// Branch Instructions
+ def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
+ BEQ_FM_MM<0x25>;
+ def BNE_MM : MMRel, CBranch<"bne", brtarget_mm, setne, GPR32Opnd>,
+ BEQ_FM_MM<0x2d>;
+ def BGEZ_MM : MMRel, CBranchZero<"bgez", brtarget_mm, setge, GPR32Opnd>,
+ BGEZ_FM_MM<0x2>;
+ def BGTZ_MM : MMRel, CBranchZero<"bgtz", brtarget_mm, setgt, GPR32Opnd>,
+ BGEZ_FM_MM<0x6>;
+ def BLEZ_MM : MMRel, CBranchZero<"blez", brtarget_mm, setle, GPR32Opnd>,
+ BGEZ_FM_MM<0x4>;
+ def BLTZ_MM : MMRel, CBranchZero<"bltz", brtarget_mm, setlt, GPR32Opnd>,
+ BGEZ_FM_MM<0x0>;
+ def BGEZAL_MM : MMRel, BGEZAL_FT<"bgezal", brtarget_mm, GPR32Opnd>,
+ BGEZAL_FM_MM<0x03>;
+ def BLTZAL_MM : MMRel, BGEZAL_FT<"bltzal", brtarget_mm, GPR32Opnd>,
+ BGEZAL_FM_MM<0x01>;
+
+ /// Trap Instructions
+ def TEQ_MM : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM_MM<0x0>;
+ def TGE_MM : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM_MM<0x08>;
+ def TGEU_MM : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM_MM<0x10>;
+ def TLT_MM : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM_MM<0x20>;
+ def TLTU_MM : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM_MM<0x28>;
+ def TNE_MM : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM_MM<0x30>;
+
+ def TEQI_MM : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM_MM<0x0e>;
+ def TGEI_MM : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM_MM<0x09>;
+ def TGEIU_MM : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM_MM<0x0b>;
+ def TLTI_MM : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM_MM<0x08>;
+ def TLTIU_MM : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM_MM<0x0a>;
+ def TNEI_MM : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM_MM<0x0c>;
}
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index b88c0d2..e796deb 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -28,7 +28,6 @@ namespace llvm {
FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
JITCodeEmitter &JCE);
FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
- FunctionPass *createMipsOptimizeMathLibCalls(MipsTargetMachine &TM);
} // end namespace llvm;
#endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 2595e41..b8e3f39 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -78,6 +78,8 @@ def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
+
def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
"microMips mode">;
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 54fdb78..8ce2ced 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -20,7 +20,7 @@ namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, 8) {}
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 7e456aa..81bf18c 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <string>
static void inlineAsmOut
@@ -321,6 +322,37 @@ static void assureFPCallStub(Function &F, Module *M,
}
//
+// Functions that are llvm intrinsics and don't need helpers.
+//
+static const char *IntrinsicInline[] =
+ {"fabs",
+ "fabsf",
+ "llvm.ceil.f32", "llvm.ceil.f64",
+ "llvm.copysign.f32", "llvm.copysign.f64",
+ "llvm.cos.f32", "llvm.cos.f64",
+ "llvm.exp.f32", "llvm.exp.f64",
+ "llvm.exp2.f32", "llvm.exp2.f64",
+ "llvm.fabs.f32", "llvm.fabs.f64",
+ "llvm.floor.f32", "llvm.floor.f64",
+ "llvm.fma.f32", "llvm.fma.f64",
+ "llvm.log.f32", "llvm.log.f64",
+ "llvm.log10.f32", "llvm.log10.f64",
+ "llvm.nearbyint.f32", "llvm.nearbyint.f64",
+ "llvm.pow.f32", "llvm.pow.f64",
+ "llvm.powi.f32", "llvm.powi.f64",
+ "llvm.rint.f32", "llvm.rint.f64",
+ "llvm.round.f32", "llvm.round.f64",
+ "llvm.sin.f32", "llvm.sin.f64",
+ "llvm.sqrt.f32", "llvm.sqrt.f64",
+ "llvm.trunc.f32", "llvm.trunc.f64",
+ };
+
+static bool isIntrinsicInline(Function *F) {
+ return std::binary_search(
+ IntrinsicInline, array_endof(IntrinsicInline),
+ F->getName());
+}
+//
// Returns of float, double and complex need to be handled with a helper
// function.
//
@@ -372,7 +404,7 @@ static bool fixupFPReturnAndCall
// helper functions
if (Subtarget.getRelocationModel() != Reloc::PIC_ ) {
Function *F_ = CI->getCalledFunction();
- if (F_ && needsFPHelperFromSig(*F_)) {
+ if (F_ && !isIntrinsicInline(F_) && needsFPHelperFromSig(*F_)) {
assureFPCallStub(*F_, M, Subtarget);
Modified=true;
}
@@ -390,7 +422,7 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
std::string Name = F->getName();
std::string SectionName = ".mips16.fn." + Name;
std::string StubName = "__fn_stub_" + Name;
- std::string LocalName = "__fn_local_" + Name;
+ std::string LocalName = "$$__fn_local_" + Name;
Function *FStub = Function::Create
(F->getFunctionType(),
Function::InternalLinkage, StubName, M);
@@ -405,19 +437,36 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
IAH.Out(" .set macro");
if (PicMode) {
IAH.Out(".set noreorder");
- IAH.Out(".cpload $$2");
+ IAH.Out(".cpload $$25");
IAH.Out(".set reorder");
IAH.Out(".reloc 0,R_MIPS_NONE," + Name);
IAH.Out("la $$25," + LocalName);
}
- else
- IAH.Out("la $$25, " + Name);
+ else {
+ IAH.Out(".set reorder");
+ IAH.Out("la $$25," + Name);
+ }
swapFPIntParams(PV, M, IAH, LE, false);
IAH.Out("jr $$25");
IAH.Out(LocalName + " = " + Name);
new UnreachableInst(FStub->getContext(), BB);
}
+//
+// remove the use-soft-float attribute
+//
+static void removeUseSoftFloat(Function &F) {
+ AttributeSet A;
+ DEBUG(errs() << "removing -use-soft-float\n");
+ A = A.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ "use-soft-float", "false");
+ F.removeAttributes(AttributeSet::FunctionIndex, A);
+ if (F.hasFnAttribute("use-soft-float")) {
+ DEBUG(errs() << "still has -use-soft-float\n");
+ }
+ F.addAttributes(AttributeSet::FunctionIndex, A);
+}
+
namespace llvm {
//
@@ -441,6 +490,11 @@ bool Mips16HardFloat::runOnModule(Module &M) {
DEBUG(errs() << "Run on Module Mips16HardFloat\n");
bool Modified = false;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (F->hasFnAttribute("nomips16") &&
+ F->hasFnAttribute("use-soft-float")) {
+ removeUseSoftFloat(*F);
+ continue;
+ }
if (F->isDeclaration() || F->hasFnAttribute("mips16_fp_stub") ||
F->hasFnAttribute("nomips16")) continue;
Modified |= fixupFPReturnAndCall(*F, &M, Subtarget);
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 0caa277..4948f40 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -80,10 +80,11 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
V1 = RegInfo.createVirtualRegister(RC);
V2 = RegInfo.createVirtualRegister(RC);
- BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
- BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
- .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+ BuildMI(MBB, I, DL, TII.get(Mips::GotPrologue16), V0).
+ addReg(V1, RegState::Define).
+ addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI).
+ addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+
BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
.addReg(V1).addReg(V2);
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 6ed1d9e..61d8bb8 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -90,6 +90,7 @@ static const Mips16Libcall HardFloatLibCalls[] = {
};
static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
+ {"__fixunsdfsi", "__mips16_call_stub_2" },
{"ceil", "__mips16_call_stub_df_2"},
{"ceilf", "__mips16_call_stub_sf_1"},
{"copysign", "__mips16_call_stub_df_10"},
@@ -144,6 +145,11 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
computeRegisterProperties();
}
@@ -168,57 +174,57 @@ Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case Mips::SelBneZ:
return emitSel16(Mips::BnezRxImm16, MI, BB);
case Mips::SelTBteqZCmpi:
- return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::CmpiRxImmX16, MI, BB);
case Mips::SelTBteqZSlti:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::SltiRxImmX16, MI, BB);
case Mips::SelTBteqZSltiu:
- return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Bteqz16, Mips::SltiuRxImmX16, MI, BB);
case Mips::SelTBtneZCmpi:
- return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::CmpiRxImmX16, MI, BB);
case Mips::SelTBtneZSlti:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::SltiRxImmX16, MI, BB);
case Mips::SelTBtneZSltiu:
- return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB);
+ return emitSeliT16(Mips::Btnez16, Mips::SltiuRxImmX16, MI, BB);
case Mips::SelTBteqZCmp:
- return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::CmpRxRy16, MI, BB);
case Mips::SelTBteqZSlt:
- return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::SltRxRy16, MI, BB);
case Mips::SelTBteqZSltu:
- return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ return emitSelT16(Mips::Bteqz16, Mips::SltuRxRy16, MI, BB);
case Mips::SelTBtneZCmp:
- return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::CmpRxRy16, MI, BB);
case Mips::SelTBtneZSlt:
- return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::SltRxRy16, MI, BB);
case Mips::SelTBtneZSltu:
- return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ return emitSelT16(Mips::Btnez16, Mips::SltuRxRy16, MI, BB);
case Mips::BteqzT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::CmpRxRy16, MI, BB);
case Mips::BteqzT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::SltRxRy16, MI, BB);
case Mips::BteqzT8SltuX16:
// TBD: figure out a way to get this or remove the instruction
// altogether.
- return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Bteqz16, Mips::SltuRxRy16, MI, BB);
case Mips::BtnezT8CmpX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::CmpRxRy16, MI, BB);
case Mips::BtnezT8SltX16:
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::SltRxRy16, MI, BB);
case Mips::BtnezT8SltuX16:
// TBD: figure out a way to get this or remove the instruction
// altogether.
- return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB);
+ return emitFEXT_T8I816_ins(Mips::Btnez16, Mips::SltuRxRy16, MI, BB);
case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
+ Mips::Bteqz16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
+ Mips::Bteqz16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
+ Mips::Bteqz16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
+ Mips::Btnez16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, false, MI, BB);
case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
+ Mips::Btnez16, Mips::SltiRxImm16, Mips::SltiRxImmX16, true, MI, BB);
case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins(
- Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
+ Mips::Btnez16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, false, MI, BB);
break;
case Mips::SltCCRxRy16:
return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB);
@@ -418,6 +424,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
SelectionDAG &DAG = CLI.DAG;
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
const char* Mips16HelperFunction = 0;
bool NeedMips16Helper = false;
@@ -473,7 +481,10 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
if (NeedMips16Helper) {
RegsToPass.push_front(std::make_pair(V0Reg, Callee));
JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
- JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT);
+ ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
+ JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG,
+ MipsII::MO_GOT, Chain,
+ FuncInfo->callPtrInfo(S->getSymbol()));
} else
RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 05e70ab..000ea28 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -10,7 +10,6 @@
// This file contains the Mips16 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
-#include <stdio.h>
#include "Mips16InstrInfo.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsMachineFunction.h"
@@ -20,10 +19,12 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cctype>
using namespace llvm;
@@ -36,7 +37,7 @@ static cl::opt<bool> NeverUseSaveRestore(
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm, Mips::BimmX16),
+ : MipsInstrInfo(tm, Mips::Bimm16),
RI(*tm.getSubtargetImpl()) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
@@ -77,11 +78,11 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::GPR32RegClass.contains(DestReg) &&
Mips::CPU16RegsRegClass.contains(SrcReg))
Opc = Mips::Move32R16;
- else if ((SrcReg == Mips::HI) &&
+ else if ((SrcReg == Mips::HI0) &&
(Mips::CPU16RegsRegClass.contains(DestReg)))
Opc = Mips::Mfhi16, SrcReg = 0;
- else if ((SrcReg == Mips::LO) &&
+ else if ((SrcReg == Mips::LO0) &&
(Mips::CPU16RegsRegClass.contains(DestReg)))
Opc = Mips::Mflo16, SrcReg = 0;
@@ -151,13 +152,17 @@ unsigned Mips16InstrInfo::getOppositeBranchOpc(unsigned Opc) const {
default: llvm_unreachable("Illegal opcode!");
case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
+ case Mips::BeqzRxImm16: return Mips::BnezRxImm16;
+ case Mips::BnezRxImm16: return Mips::BeqzRxImm16;
case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16;
case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16;
case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16;
+ case Mips::Btnez16: return Mips::Bteqz16;
case Mips::BtnezX16: return Mips::BteqzX16;
case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16;
case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16;
case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16;
+ case Mips::Bteqz16: return Mips::Btnez16;
case Mips::BteqzX16: return Mips::BtnezX16;
case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16;
case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16;
@@ -439,6 +444,9 @@ Mips16InstrInfo::basicLoadImmediate(
unsigned Mips16InstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 ||
+ Opc == Mips::Bimm16 ||
+ Opc == Mips::Bteqz16 || Opc == Mips::Btnez16 ||
+ Opc == Mips::BeqzRxImm16 || Opc == Mips::BnezRxImm16 ||
Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 ||
Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 ||
Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 ||
@@ -473,7 +481,6 @@ const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
return new Mips16InstrInfo(TM);
}
-#include <stdio.h>
bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
int64_t Amount) {
switch (Opcode) {
@@ -493,6 +500,49 @@ bool Mips16InstrInfo::validImmediate(unsigned Opcode, unsigned Reg,
return isInt<16>(Amount);
return isInt<15>(Amount);
}
- printf("Unexpected opcode %i \n", Opcode);
llvm_unreachable("unexpected Opcode in validImmediate");
}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// We implement the special case of the .space directive taking only an
+/// integer argument, which is the size in bytes. This is used for creating
+/// inline code spacing for testing purposes using inline assembly.
+///
+unsigned Mips16InstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+
+
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ if (strncmp(Str, ".space", 6)==0) {
+ char *EStr; int Sz;
+ Sz = strtol(Str+6, &EStr, 10);
+ while (isspace(*EStr)) ++EStr;
+ if (*EStr=='\0') {
+ DEBUG(dbgs() << "parsed .space " << Sz << '\n');
+ return Sz;
+ }
+ }
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ return Length;
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index 118d258..d9a594b 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -108,6 +108,8 @@ public:
void BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const;
private:
virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index aef4e92..7441c78 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -32,6 +32,16 @@ def mem16_ea : Operand<i32> {
}
//
+// I-type instruction format
+//
+// this is only used by bimm. the actual assembly value is a 12 bit signed
+// number
+//
+class FI16_ins<bits<5> op, string asmstr, InstrItinClass itin>:
+ FI16<op, (outs), (ins brtarget:$imm16),
+ !strconcat(asmstr, "\t$imm16 # 16 bit inst"), [], itin>;
+
+//
//
// I8 instruction format
//
@@ -41,7 +51,10 @@ class FI816_ins_base<bits<3> _func, string asmstr,
FI816<_func, (outs), (ins simm16:$imm), !strconcat(asmstr, asmstr2),
[], itin>;
-
+class FI816_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FI816_ins_base<_func, asmstr, "\t$imm # 16 bit inst", itin>;
+
class FI816_SP_ins<bits<3> _func, string asmstr,
InstrItinClass itin>:
FI816_ins_base<_func, asmstr, "\t$$sp, $imm # 16 bit inst", itin>;
@@ -60,6 +73,11 @@ class FRI16_ins<bits<5> op, string asmstr,
InstrItinClass itin>:
FRI16_ins_base<op, asmstr, "\t$rx, $imm \t# 16 bit inst", itin>;
+class FRI16_TCP_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FRI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm, i32imm:$size),
+ !strconcat(asmstr, "\t$rx, $imm\t# 16 bit inst"), [], itin>;
+
class FRI16R_ins_base<bits<5> op, string asmstr, string asmstr2,
InstrItinClass itin>:
FRI16<op, (outs), (ins CPU16Regs:$rx, simm16:$imm),
@@ -172,6 +190,11 @@ class FEXT_RI16_B_ins<bits<5> _op, string asmstr,
FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
!strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+class FEXT_RI16_TCP_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins pcrel16:$imm, i32imm:$size),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+
class FEXT_2RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
@@ -220,7 +243,7 @@ class FEXT_RRI_A16_mem_ins<bits<1> op, string asmstr, Operand MemOpnd,
// EXT-SHIFT instruction format
//
class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
- FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
+ FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
//
@@ -343,6 +366,14 @@ class FRR16_JALRC_ins<bits<1> nd, bits<1> l, bits<1> ra,
FRR16_JALRC<nd, l, ra, (outs), (ins CPU16Regs:$rx),
!strconcat(asmstr, "\t $rx"), [], itin> ;
+class FRR_SF16_ins
+ <bits<5> _funct, bits<3> _subfunc,
+ string asmstr, InstrItinClass itin>:
+ FRR_SF16<_funct, _subfunc, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_),
+ !strconcat(asmstr, "\t $rx"),
+ [], itin> {
+ let Constraints = "$rx_ = $rx";
+ }
//
// RRR-type instruction format
//
@@ -447,7 +478,7 @@ def Constant32:
MipsPseudo16<(outs), (ins imm32:$imm), "\t.word $imm", []>;
def LwConstant32:
- MipsPseudo16<(outs CPU16Regs:$rx), (ins imm32:$imm),
+ MipsPseudo16<(outs CPU16Regs:$rx), (ins imm32:$imm, imm32:$constid),
"lw\t$rx, 1f\n\tb\t2f\n\t.align\t2\n1: \t.word\t$imm\n2:", []>;
@@ -559,6 +590,14 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
//
def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+//
+// Format: B offset MIPS16e
+// Purpose: Unconditional Branch (Extended)
+// To do an unconditional PC-relative branch.
+//
+
+def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16;
+
// Format: B offset MIPS16e
// Purpose: Unconditional Branch
// To do an unconditional PC-relative branch.
@@ -591,6 +630,10 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
+def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
+
def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
let Uses = [T8];
}
@@ -614,6 +657,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
// Purpose: Branch on T Not Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
+
+def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 {
+ let Uses = [T8];
+}
+
def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
let Uses = [T8];
}
@@ -665,7 +713,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
// To divide 32-bit signed integers.
//
def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -674,7 +722,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
// To divide 32-bit unsigned integers.
//
def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
// Format: JAL target MIPS16e
@@ -684,10 +732,7 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
//
def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
- let isBranch = 1;
let hasDelaySlot = 0; // not true, but we add the nop for now
- let isTerminator=1;
- let isBarrier=1;
let isCall=1;
}
@@ -771,6 +816,10 @@ def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
//
def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> {
+ let isCodeGenOnly = 1;
+}
+
//
// Format: LW ry, offset(rx) MIPS16e
// Purpose: Load Word (Extended)
@@ -784,10 +833,13 @@ def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IILoad>, MayLoad{
// Purpose: Load Word (SP-Relative, Extended)
// To load an SP-relative word from memory as a signed value.
//
-def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10110, "lw", IILoad>, MayLoad{
+def LwRxSpImmX16: FEXT_RI16_SP_explicit_ins<0b10010, "lw", IILoad>, MayLoad{
let Uses = [SP];
}
+def LwRxPcTcp16: FRI16_TCP_ins<0b10110, "lw", IILoad>, MayLoad;
+
+def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", IILoad>, MayLoad;
//
// Format: MOVE r32, rz MIPS16e
// Purpose: Move
@@ -808,7 +860,7 @@ def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
// To copy the special purpose HI register to a GPR.
//
def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
- let Uses = [HI];
+ let Uses = [HI0];
let neverHasSideEffects = 1;
}
@@ -818,7 +870,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
// To copy the special purpose LO register to a GPR.
//
def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
- let Uses = [LO];
+ let Uses = [LO0];
let neverHasSideEffects = 1;
}
@@ -828,13 +880,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -845,7 +897,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -856,7 +908,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
let isCommutable = 1;
let neverHasSideEffects = 1;
- let Defs = [HI, LO];
+ let Defs = [HI0, LO0];
}
//
@@ -951,6 +1003,22 @@ def SbRxRyOffMemX16:
FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIStore>, MayStore;
//
+// Format: SEB rx MIPS16e
+// Purpose: Sign-Extend Byte
+// Sign-extend least significant byte in register rx.
+//
+def SebRx16
+ : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>;
+
+//
+// Format: SEH rx MIPS16e
+// Purpose: Sign-Extend Halfword
+// Sign-extend least significant word in register rx.
+//
+def SehRx16
+ : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>;
+
+//
// The Sel(T) instructions are pseudos
// T means that they use T8 implicitly.
//
@@ -1075,7 +1143,7 @@ def ShRxRyOffMemX16:
//
// Format: SLL rx, ry, sa MIPS16e
// Purpose: Shift Word Left Logical (Extended)
-// To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
+// To execute a left-shift of a word by a fixed number of bits-0 to 31 bits.
//
def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
@@ -1171,7 +1239,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
// Format: SRA rx, ry, sa MIPS16e
// Purpose: Shift Word Right Arithmetic (Extended)
// To execute an arithmetic right-shift of a word by a fixed
-// number of bits—1 to 8 bits.
+// number of bits-1 to 8 bits.
//
def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
@@ -1189,7 +1257,7 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
// Format: SRL rx, ry, sa MIPS16e
// Purpose: Shift Word Right Logical (Extended)
// To execute a logical right-shift of a word by a fixed
-// number of bits—1 to 31 bits.
+// number of bits-1 to 31 bits.
//
def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
@@ -1330,9 +1398,7 @@ def: Mips16Pat<(i32 addr16:$addr),
// Large (>16 bit) immediate loads
-def : Mips16Pat<(i32 imm:$imm),
- (OrRxRxRy16 (SllX16 (LiRxImmX16 (HI16 imm:$imm)), 16),
- (LiRxImmX16 (LO16 imm:$imm)))>;
+def : Mips16Pat<(i32 imm:$imm), (LwConstant32 imm:$imm, -1)>;
// Carry MipsPatterns
def : Mips16Pat<(subc CPU16Regs:$lhs, CPU16Regs:$rhs),
@@ -1373,7 +1439,7 @@ def: Mips16Pat
def: Mips16Pat
<(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16),
- (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BeqzRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1435,7 +1501,7 @@ def: Mips16Pat
def: Mips16Pat
<(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16),
- (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BnezRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1443,7 +1509,7 @@ def: Mips16Pat
//
def: Mips16Pat
<(brcond CPU16Regs:$rx, bb:$targ16),
- (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ (BnezRxImm16 CPU16Regs:$rx, bb:$targ16)
>;
//
@@ -1473,7 +1539,7 @@ def: Mips16Pat
// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
// >;
-def: UncondBranch16_pat<br, BimmX16>;
+def: UncondBranch16_pat<br, Bimm16>;
// Small immediates
def: Mips16Pat<(i32 immSExt16:$in),
@@ -1787,7 +1853,8 @@ def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
(AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
// hi/lo relocs
-
+def : Mips16Pat<(MipsHi tblockaddress:$in),
+ (SllX16 (LiRxImmX16 tblockaddress:$in), 16)>;
def : Mips16Pat<(MipsHi tglobaladdr:$in),
(SllX16 (LiRxImmX16 tglobaladdr:$in), 16)>;
def : Mips16Pat<(MipsHi tjumptable:$in),
@@ -1795,6 +1862,8 @@ def : Mips16Pat<(MipsHi tjumptable:$in),
def : Mips16Pat<(MipsHi tglobaltlsaddr:$in),
(SllX16 (LiRxImmX16 tglobaltlsaddr:$in), 16)>;
+def : Mips16Pat<(MipsLo tblockaddress:$in), (LiRxImmX16 tblockaddress:$in)>;
+
// wrapper_pic
class Wrapper16Pat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
Mips16Pat<(MipsWrapper RC:$gp, node:$in),
@@ -1811,3 +1880,30 @@ def : Mips16Pat<(i32 (extloadi16 addr16:$src)),
def: Mips16Pat<(trap), (Break16)>;
+def : Mips16Pat<(sext_inreg CPU16Regs:$val, i8),
+ (SebRx16 CPU16Regs:$val)>;
+
+def : Mips16Pat<(sext_inreg CPU16Regs:$val, i16),
+ (SehRx16 CPU16Regs:$val)>;
+
+def GotPrologue16:
+ MipsPseudo16<
+ (outs CPU16Regs:$rh, CPU16Regs:$rl),
+ (ins simm16:$immHi, simm16:$immLo),
+ ".align 2\n\tli\t$rh, $immHi\n\taddiu\t$rl, $$pc, $immLo\n ",[]> ;
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+ // let PrintMethod = "printCPInstOperand";
+}
+
+// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+// the function. The first operand is the ID# for this instruction, the second
+// is the index into the MachineConstantPool that this is, the third is the
+// size in bytes of this constant pool entry.
+//
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+MipsPseudo16<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+ i32imm:$size), "foo", []>;
+
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index a752ab8..15ef654 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -15,9 +15,6 @@
// Mips Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
-// Instruction operand types
-def shamt_64 : Operand<i64>;
-
// Unsigned Operand
def uimm16_64 : Operand<i64> {
let PrintMethod = "printUnsignedImm";
@@ -34,36 +31,21 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
-let DecoderNamespace = "Mips64" in {
-
-multiclass Atomic2Ops64<PatFrag Op> {
- def NAME : Atomic2Ops<Op, GPR64, GPR32>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, GPR64, GPR64>, Requires<[IsN64, HasStdEnc]>;
-}
-
-multiclass AtomicCmpSwap64<PatFrag Op> {
- def NAME : AtomicCmpSwap<Op, GPR64, GPR32>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, GPR64, GPR64>,
- Requires<[IsN64, HasStdEnc]>;
-}
-}
-let usesCustomInserter = 1, Predicates = [HasStdEnc],
- DecoderNamespace = "Mips64" in {
- defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64>;
- defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64>;
- defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64>;
- defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64<atomic_load_or_64>;
- defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64<atomic_load_xor_64>;
- defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64>;
- defm ATOMIC_SWAP_I64 : Atomic2Ops64<atomic_swap_64>;
- defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>;
+let usesCustomInserter = 1 in {
+ def ATOMIC_LOAD_ADD_I64 : Atomic2Ops<atomic_load_add_64, GPR64>;
+ def ATOMIC_LOAD_SUB_I64 : Atomic2Ops<atomic_load_sub_64, GPR64>;
+ def ATOMIC_LOAD_AND_I64 : Atomic2Ops<atomic_load_and_64, GPR64>;
+ def ATOMIC_LOAD_OR_I64 : Atomic2Ops<atomic_load_or_64, GPR64>;
+ def ATOMIC_LOAD_XOR_I64 : Atomic2Ops<atomic_load_xor_64, GPR64>;
+ def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
+ def ATOMIC_SWAP_I64 : Atomic2Ops<atomic_swap_64, GPR64>;
+ def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
}
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1 in {
- defm LOAD_AC128 : LoadM<"", ACRegs128>;
- defm STORE_AC128 : StoreM<"", ACRegs128>;
+ def LOAD_ACC128 : Load<"", ACC128>;
+ def STORE_ACC128 : Store<"", ACC128>;
}
//===----------------------------------------------------------------------===//
@@ -110,103 +92,101 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>;
}
/// Shift Instructions
-def DSLL : shift_rotate_imm<"dsll", shamt, GPR64Opnd, shl, immZExt6>,
+def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, shl, immZExt6>,
SRA_FM<0x38, 0>;
-def DSRL : shift_rotate_imm<"dsrl", shamt, GPR64Opnd, srl, immZExt6>,
+def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, srl, immZExt6>,
SRA_FM<0x3a, 0>;
-def DSRA : shift_rotate_imm<"dsra", shamt, GPR64Opnd, sra, immZExt6>,
+def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, sra, immZExt6>,
SRA_FM<0x3b, 0>;
def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, shl>, SRLV_FM<0x14, 0>;
def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, srl>, SRLV_FM<0x16, 0>;
def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, sra>, SRLV_FM<0x17, 0>;
-def DSLL32 : shift_rotate_imm<"dsll32", shamt, GPR64Opnd>, SRA_FM<0x3c, 0>;
-def DSRL32 : shift_rotate_imm<"dsrl32", shamt, GPR64Opnd>, SRA_FM<0x3e, 0>;
-def DSRA32 : shift_rotate_imm<"dsra32", shamt, GPR64Opnd>, SRA_FM<0x3f, 0>;
+def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd>, SRA_FM<0x3c, 0>;
+def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 0>;
+def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd>, SRA_FM<0x3f, 0>;
// Rotate Instructions
let Predicates = [HasMips64r2, HasStdEnc] in {
- def DROTR : shift_rotate_imm<"drotr", shamt, GPR64Opnd, rotr, immZExt6>,
+ def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, rotr, immZExt6>,
SRA_FM<0x3a, 1>;
def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, rotr>,
SRLV_FM<0x16, 1>;
+ def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd>, SRA_FM<0x3e, 1>;
}
/// Load and Store Instructions
/// aligned
let isCodeGenOnly = 1 in {
-defm LB64 : LoadM<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>;
-defm LBu64 : LoadM<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>;
-defm LH64 : LoadM<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>;
-defm LHu64 : LoadM<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>;
-defm LW64 : LoadM<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>;
-defm SB64 : StoreM<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>;
-defm SH64 : StoreM<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>;
-defm SW64 : StoreM<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>;
+def LB64 : Load<"lb", GPR64Opnd, sextloadi8, IILoad>, LW_FM<0x20>;
+def LBu64 : Load<"lbu", GPR64Opnd, zextloadi8, IILoad>, LW_FM<0x24>;
+def LH64 : Load<"lh", GPR64Opnd, sextloadi16, IILoad>, LW_FM<0x21>;
+def LHu64 : Load<"lhu", GPR64Opnd, zextloadi16, IILoad>, LW_FM<0x25>;
+def LW64 : Load<"lw", GPR64Opnd, sextloadi32, IILoad>, LW_FM<0x23>;
+def SB64 : Store<"sb", GPR64Opnd, truncstorei8, IIStore>, LW_FM<0x28>;
+def SH64 : Store<"sh", GPR64Opnd, truncstorei16, IIStore>, LW_FM<0x29>;
+def SW64 : Store<"sw", GPR64Opnd, truncstorei32, IIStore>, LW_FM<0x2b>;
}
-defm LWu : LoadM<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>;
-defm LD : LoadM<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>;
-defm SD : StoreM<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>;
+def LWu : Load<"lwu", GPR64Opnd, zextloadi32, IILoad>, LW_FM<0x27>;
+def LD : Load<"ld", GPR64Opnd, load, IILoad>, LW_FM<0x37>;
+def SD : Store<"sd", GPR64Opnd, store, IIStore>, LW_FM<0x3f>;
/// load/store left/right
let isCodeGenOnly = 1 in {
-defm LWL64 : LoadLeftRightM<"lwl", MipsLWL, GPR64Opnd>, LW_FM<0x22>;
-defm LWR64 : LoadLeftRightM<"lwr", MipsLWR, GPR64Opnd>, LW_FM<0x26>;
-defm SWL64 : StoreLeftRightM<"swl", MipsSWL, GPR64Opnd>, LW_FM<0x2a>;
-defm SWR64 : StoreLeftRightM<"swr", MipsSWR, GPR64Opnd>, LW_FM<0x2e>;
+def LWL64 : LoadLeftRight<"lwl", MipsLWL, GPR64Opnd, IILoad>, LW_FM<0x22>;
+def LWR64 : LoadLeftRight<"lwr", MipsLWR, GPR64Opnd, IILoad>, LW_FM<0x26>;
+def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, IIStore>, LW_FM<0x2a>;
+def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, IIStore>, LW_FM<0x2e>;
}
-defm LDL : LoadLeftRightM<"ldl", MipsLDL, GPR64Opnd>, LW_FM<0x1a>;
-defm LDR : LoadLeftRightM<"ldr", MipsLDR, GPR64Opnd>, LW_FM<0x1b>;
-defm SDL : StoreLeftRightM<"sdl", MipsSDL, GPR64Opnd>, LW_FM<0x2c>;
-defm SDR : StoreLeftRightM<"sdr", MipsSDR, GPR64Opnd>, LW_FM<0x2d>;
+def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, IILoad>, LW_FM<0x1a>;
+def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, IILoad>, LW_FM<0x1b>;
+def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, IIStore>, LW_FM<0x2c>;
+def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, IIStore>, LW_FM<0x2d>;
/// Load-linked, Store-conditional
-let Predicates = [NotN64, HasStdEnc] in {
- def LLD : LLBase<"lld", GPR64Opnd, mem>, LW_FM<0x34>;
- def SCD : SCBase<"scd", GPR64Opnd, mem>, LW_FM<0x3c>;
-}
-
-let Predicates = [IsN64, HasStdEnc], isCodeGenOnly = 1 in {
- def LLD_P8 : LLBase<"lld", GPR64Opnd, mem64>, LW_FM<0x34>;
- def SCD_P8 : SCBase<"scd", GPR64Opnd, mem64>, LW_FM<0x3c>;
-}
+def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>;
+def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
-def JR64 : IndirectBranch<GPR64Opnd>, MTLO_FM<8>;
-def BEQ64 : CBranch<"beq", seteq, GPR64Opnd>, BEQ_FM<4>;
-def BNE64 : CBranch<"bne", setne, GPR64Opnd>, BEQ_FM<5>;
-def BGEZ64 : CBranchZero<"bgez", setge, GPR64Opnd>, BGEZ_FM<1, 1>;
-def BGTZ64 : CBranchZero<"bgtz", setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
-def BLEZ64 : CBranchZero<"blez", setle, GPR64Opnd>, BGEZ_FM<6, 0>;
-def BLTZ64 : CBranchZero<"bltz", setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
+def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
+def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
+def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
+def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
+def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
+def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
+def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-def TAILCALL64_R : JumpFR<GPR64Opnd, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+def TAILCALL64_R : JumpFR<"tcallr", GPR64Opnd, MipsTailCall>,
+ MTLO_FM<8>, IsTailCall;
}
/// Multiply and Divide Instructions.
-def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI64, LO64]>,
+def DMULT : Mult<"dmult", IIImult, GPR64Opnd, [HI0_64, LO0_64]>,
MULT_FM<0, 0x1c>;
-def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI64, LO64]>,
+def DMULTu : Mult<"dmultu", IIImult, GPR64Opnd, [HI0_64, LO0_64]>,
MULT_FM<0, 0x1d>;
-def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, GPR64Opnd, MipsMult,
+def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult,
IIImult>;
-def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, GPR64Opnd, MipsMultu,
+def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu,
IIImult>;
-def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1e>;
-def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI64, LO64]>, MULT_FM<0, 0x1f>;
-def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, GPR64Opnd, MipsDivRem,
+def DSDIV : Div<"ddiv", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1e>;
+def DUDIV : Div<"ddivu", IIIdiv, GPR64Opnd, [HI0_64, LO0_64]>, MULT_FM<0, 0x1f>;
+def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem,
IIIdiv, 0, 1, 1>;
-def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, GPR64Opnd, MipsDivRemU,
+def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU,
IIIdiv, 0, 1, 1>;
let isCodeGenOnly = 1 in {
-def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI64]>, MTLO_FM<0x11>;
-def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO64]>, MTLO_FM<0x13>;
-def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, [HI64]>, MFLO_FM<0x10>;
-def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, [LO64]>, MFLO_FM<0x12>;
+def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>;
+def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>;
+def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>;
+def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>;
+def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>;
+def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>;
+def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>;
/// Sign Ext In Register Instructions.
def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd>, SEB_FM<0x10, 0x20>;
@@ -221,21 +201,18 @@ def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>;
def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>;
def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>;
-def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd, mem_ea_64>, LW_FM<0x19>;
+def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
let isCodeGenOnly = 1 in
-def RDHWR64 : ReadHardware<GPR64Opnd, HW64RegsOpnd>, RDHWR_FM;
+def RDHWR64 : ReadHardware<GPR64Opnd, HWRegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<"dext", GPR64Opnd>, EXT_FM<3>;
-let Pattern = []<dag> in {
- def DEXTU : ExtBase<"dextu", GPR64Opnd>, EXT_FM<2>;
- def DEXTM : ExtBase<"dextm", GPR64Opnd>, EXT_FM<1>;
-}
-def DINS : InsBase<"dins", GPR64Opnd>, EXT_FM<7>;
-let Pattern = []<dag> in {
- def DINSU : InsBase<"dinsu", GPR64Opnd>, EXT_FM<6>;
- def DINSM : InsBase<"dinsm", GPR64Opnd>, EXT_FM<5>;
-}
+def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>;
+def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>;
+def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>;
+
+def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>;
+def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>;
+def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>;
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt),
@@ -251,18 +228,12 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
//===----------------------------------------------------------------------===//
// extended loads
-let Predicates = [NotN64, HasStdEnc] in {
+let Predicates = [HasStdEnc] in {
def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
- def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64_P8 addr:$src)>;
-}
// hi/lo relocs
def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
@@ -316,7 +287,7 @@ defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
// truncate
def : MipsPat<(i32 (trunc GPR64:$src)),
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>,
- Requires<[IsN64, HasStdEnc]>;
+ Requires<[HasStdEnc]>;
// 32-to-64-bit extension
def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>;
@@ -330,10 +301,6 @@ def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
// bswap MipsPattern
def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>;
-// mflo/hi patterns.
-def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>;
-
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
@@ -348,28 +315,16 @@ def : InstAlias<"dadd $rs, $rt, $imm",
0>;
/// Move between CPU and coprocessor registers
-let DecoderNamespace = "Mips64" in {
-def DMFC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rt),
- (ins GPR64Opnd:$rd, uimm16:$sel),
- "dmfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 1>;
-def DMTC0_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel),
- (ins GPR64Opnd:$rt),
- "dmtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 5>;
-def DMFC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rt),
- (ins GPR64Opnd:$rd, uimm16:$sel),
- "dmfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 1>;
-def DMTC2_3OP64 : MFC3OP<(outs GPR64Opnd:$rd, uimm16:$sel),
- (ins GPR64Opnd:$rt),
- "dmtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 5>;
+let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
+def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
+def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>;
+def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>;
}
// Two operand (implicit 0 selector) versions:
-def : InstAlias<"dmfc0 $rt, $rd",
- (DMFC0_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc0 $rt, $rd",
- (DMTC0_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>;
-def : InstAlias<"dmfc2 $rt, $rd",
- (DMFC2_3OP64 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc2 $rt, $rd",
- (DMTC2_3OP64 GPR64Opnd:$rd, 0, GPR64Opnd:$rt), 0>;
+def : InstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : InstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index a094dda..cc09034 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -22,7 +22,7 @@ namespace llvm {
};
typedef SmallVector<Inst, 7 > InstSeq;
- /// Analyze - Get an instrucion sequence to load immediate Imm. The last
+ /// Analyze - Get an instruction sequence to load immediate Imm. The last
/// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
/// true;
const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
@@ -32,19 +32,19 @@ namespace llvm {
/// AddInstr - Add I to all instruction sequences in SeqLs.
void AddInstr(InstSeqLs &SeqLs, const Inst &I);
- /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
+ /// GetInstSeqLsADDiu - Get instruction sequences which end with an ADDiu to
/// load immediate Imm
void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
+ /// GetInstSeqLsORi - Get instrutcion sequences which end with an ORi to
/// load immediate Imm
void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
+ /// GetInstSeqLsSLL - Get instruction sequences which end with a SLL to
/// load immediate Imm
void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
- /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
+ /// GetInstSeqLs - Get instruction sequences to load immediate Imm.
void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
/// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1dc3326..45c4398 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -15,11 +15,11 @@
#define DEBUG_TYPE "mips-asm-printer"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MCTargetDesc/MipsELFStreamer.h"
#include "Mips.h"
#include "MipsAsmPrinter.h"
#include "MipsInstrInfo.h"
#include "MipsMCInstLower.h"
+#include "MipsTargetStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
@@ -33,8 +33,8 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/TargetRegistry.h"
@@ -45,12 +45,17 @@
using namespace llvm;
+MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() {
+ return static_cast<MipsTargetStreamer &>(OutStreamer.getTargetStreamer());
+}
+
bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Initialize TargetLoweringObjectFile.
if (Subtarget->allowMixed16_32())
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MCP = MF.getConstantPool();
AsmPrinter::runOnMachineFunction(MF);
return true;
}
@@ -71,6 +76,39 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ // If we just ended a constant pool, mark it as such.
+ if (InConstantPool && MI->getOpcode() != Mips::CONSTPOOL_ENTRY) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
+ InConstantPool = false;
+ }
+ if (MI->getOpcode() == Mips::CONSTPOOL_ENTRY) {
+ // CONSTPOOL_ENTRY - This instruction represents a floating
+ //constant pool in the function. The first operand is the ID#
+ // for this instruction, the second is the index into the
+ // MachineConstantPool that this is, the third is the size in
+ // bytes of this constant pool entry.
+ // The required alignment is specified on the basic block holding this MI.
+ //
+ unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+
+ // If this is the first entry of the pool, mark it.
+ if (!InConstantPool) {
+ OutStreamer.EmitDataRegion(MCDR_DataRegion);
+ InConstantPool = true;
+ }
+
+ OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+ if (MCPE.isMachineConstantPoolEntry())
+ EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+ else
+ EmitGlobalConstant(MCPE.Val.ConstVal);
+ return;
+ }
+
+
MachineBasicBlock::const_instr_iterator I = MI;
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -238,16 +276,15 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
}
if (Subtarget->inMicroMipsMode())
- if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
- MES->emitMipsSTOCG(*Subtarget, CurrentFnSym,
- (unsigned)ELF::STO_MIPS_MICROMIPS);
+ getTargetStreamer().emitMipsHackSTOCG(CurrentFnSym,
+ (unsigned)ELF::STO_MIPS_MICROMIPS);
OutStreamer.EmitLabel(CurrentFnSym);
}
/// EmitFunctionBodyStart - Targets can override this to emit stuff before
/// the first basic block in the function.
void MipsAsmPrinter::EmitFunctionBodyStart() {
- MCInstLowering.Initialize(Mang, &MF->getContext());
+ MCInstLowering.Initialize(&MF->getContext());
bool IsNakedFunction =
MF->getFunction()->
@@ -284,6 +321,12 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
}
OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
}
+ // Make sure to terminate any constant pools that were at the end
+ // of the function.
+ if (!InConstantPool)
+ return;
+ InConstantPool = false;
+ OutStreamer.EmitDataRegion(MCDR_DataRegionEnd);
}
/// isBlockOnlyReachableByFallthough - Return true if the basic block has
@@ -418,6 +461,11 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
return false;
}
}
+ case 'w':
+ // Print MSA registers for the 'f' constraint
+ // In LLVM, the 'w' modifier doesn't need to do anything.
+ // We can just call printOperand as normal.
+ break;
}
}
@@ -485,7 +533,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
return;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_BlockAddress: {
@@ -526,6 +574,15 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
printOperand(MI, opNum, O);
}
+void MipsAsmPrinter::printUnsignedImm8(const MachineInstr *MI, int opNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isImm())
+ O << (unsigned short int)(unsigned char)MO.getImm();
+ else
+ printOperand(MI, opNum, O);
+}
+
void MipsAsmPrinter::
printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
// Load/Store memory operands -- imm($reg)
@@ -587,15 +644,54 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
-void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
+static void emitELFHeaderFlagsCG(MipsTargetStreamer &TargetStreamer,
+ const MipsSubtarget &Subtarget) {
+ // Update e_header flags
+ unsigned EFlags = 0;
- if (OutStreamer.hasRawTextSupport()) return;
+ // TODO: Need to add -mabicalls and -mno-abicalls flags.
+ // Currently we assume that -mabicalls is the default.
+ EFlags |= ELF::EF_MIPS_CPIC;
+ if (Subtarget.inMips16Mode())
+ EFlags |= ELF::EF_MIPS_ARCH_ASE_M16;
+ else
+ EFlags |= ELF::EF_MIPS_NOREORDER;
+
+ // Architecture
+ if (Subtarget.hasMips64r2())
+ EFlags |= ELF::EF_MIPS_ARCH_64R2;
+ else if (Subtarget.hasMips64())
+ EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Subtarget.hasMips32r2())
+ EFlags |= ELF::EF_MIPS_ARCH_32R2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_32;
+
+ if (Subtarget.inMicroMipsMode())
+ EFlags |= ELF::EF_MIPS_MICROMIPS;
+
+ // ABI
+ if (Subtarget.isABI_O32())
+ EFlags |= ELF::EF_MIPS_ABI_O32;
+
+ // Relocation Model
+ Reloc::Model RM = Subtarget.getRelocationModel();
+ if (RM == Reloc::PIC_ || RM == Reloc::Default)
+ EFlags |= ELF::EF_MIPS_PIC;
+ else if (RM == Reloc::Static)
+ ; // Do nothing for Reloc::Static
+ else
+ llvm_unreachable("Unsupported relocation model for e_flags");
+
+ TargetStreamer.emitMipsHackELFFlags(EFlags);
+}
+
+void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
// Emit Mips ELF register info
Subtarget->getMReginfo().emitMipsReginfoSectionCG(
OutStreamer, getObjFileLowering(), *Subtarget);
- if (MipsELFStreamer *MES = dyn_cast<MipsELFStreamer>(&OutStreamer))
- MES->emitELFHeaderFlagsCG(*Subtarget);
+ emitELFHeaderFlagsCG(getTargetStreamer(), *Subtarget);
}
void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 4d1d624..11c6acd 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -25,10 +25,12 @@ namespace llvm {
class MCStreamer;
class MachineInstr;
class MachineBasicBlock;
+class MipsTargetStreamer;
class Module;
class raw_ostream;
class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+ MipsTargetStreamer &getTargetStreamer();
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
@@ -40,6 +42,16 @@ private:
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+ /// MCP - Keep a pointer to constantpool entries of the current
+ /// MachineFunction.
+ const MachineConstantPool *MCP;
+
+ /// InConstantPool - Maintain state when emitting a sequence of constant
+ /// pool entries so we can properly mark them as data regions.
+ bool InConstantPool;
+
+ bool UsingConstantPools;
+
public:
const MipsSubtarget *Subtarget;
@@ -47,8 +59,11 @@ public:
MipsMCInstLower MCInstLowering;
explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), MCInstLowering(*this) {
+ : AsmPrinter(TM, Streamer), MCP(0), InConstantPool(false),
+ MCInstLowering(*this) {
Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ UsingConstantPools =
+ (Subtarget->inMips16Mode() && Subtarget->useConstantIslands());
}
virtual const char *getPassName() const {
@@ -57,6 +72,12 @@ public:
virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual void EmitConstantPool() LLVM_OVERRIDE {
+ if (!UsingConstantPools)
+ AsmPrinter::EmitConstantPool();
+ // we emit constant pools customly!
+ }
+
void EmitInstruction(const MachineInstr *MI);
void printSavedRegsBitmask(raw_ostream &O);
void printHex32(unsigned int Value, raw_ostream &O);
@@ -75,6 +96,7 @@ public:
raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+ void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index ac40b11..66391cb 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -26,8 +26,10 @@ def RetCC_MipsO32 : CallingConv<[
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
- // f64 are returned in register D0, D1
- CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+ // f64 arguments are returned in D0_64 and D1_64 in FP64bit mode or
+ // in D0 and D1 in FP32bit mode.
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()", CCAssignToReg<[D0_64, D1_64]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()", CCAssignToReg<[D0, D1]>>>
]>;
//===----------------------------------------------------------------------===//
@@ -149,7 +151,16 @@ def RetCC_MipsEABI : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_MipsO32_FastCC : CallingConv<[
// f64 arguments are passed in double-precision floating pointer registers.
- CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7, D8, D9]>>,
+ CCIfType<[f64], CCIfSubtarget<"isNotFP64bit()",
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9]>>>,
+ CCIfType<[f64], CCIfSubtarget<"isFP64bit()",
+ CCAssignToReg<[D0_64, D1_64, D2_64, D3_64,
+ D4_64, D5_64, D6_64, D7_64,
+ D8_64, D9_64, D10_64, D11_64,
+ D12_64, D13_64, D14_64, D15_64,
+ D16_64, D17_64, D18_64,
+ D19_64]>>>,
// Stack parameter slots for f64 are 64-bit doublewords and 8-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 8>>
@@ -224,6 +235,9 @@ def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
(sequence "S%u", 7, 0))>;
+def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP,
+ (sequence "S%u", 7, 0))>;
+
def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
(sequence "S%u_64", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 813037e..ca4163d 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -105,11 +105,16 @@ private:
const MachineOperand &MO) const;
unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getBranchTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const;
unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
int Offset) const;
@@ -186,6 +191,18 @@ unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
unsigned OpNo) const {
MachineOperand MO = MI.getOperand(OpNo);
@@ -201,6 +218,12 @@ unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
}
+unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
unsigned OpNo) const {
// size is encoded as size-1.
@@ -214,6 +237,12 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
}
+unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -316,6 +345,14 @@ bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI,
BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO)
.addReg(Mips::ZERO).addImm(0);
break;
+ case Mips::B:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO)
+ .addReg(Mips::ZERO).addOperand(MI->getOperand(0));
+ break;
+ case Mips::TRAP:
+ BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0)
+ .addImm(0);
+ break;
case Mips::JALRPseudo:
BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA)
.addReg(MI->getOperand(0).getReg());
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 39862b3..2de1430 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -19,7 +19,7 @@
class CMov_I_I_FT<string opstr, RegisterOperand CRC, RegisterOperand DRC,
InstrItinClass Itin> :
InstSE<(outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
- !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR> {
+ !strconcat(opstr, "\t$rd, $rs, $rt"), [], Itin, FrmFR, opstr> {
let Constraints = "$F = $rd";
}
@@ -37,7 +37,7 @@ class CMov_F_I_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
InstSE<(outs RC:$rd), (ins RC:$rs, FCCRegsOpnd:$fcc, RC:$F),
!strconcat(opstr, "\t$rd, $rs, $fcc"),
[(set RC:$rd, (OpNode RC:$rs, FCCRegsOpnd:$fcc, RC:$F))],
- Itin, FrmFR> {
+ Itin, FrmFR, opstr> {
let Constraints = "$F = $rd";
}
@@ -103,91 +103,94 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
}
// Instantiation of instructions.
-def MOVZ_I_I : CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, NoItinerary>,
+def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, IIArith>,
ADD_FM<0, 0xa>;
let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
- def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
- def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
- def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xa>;
+ def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
+ def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xa>;
}
-def MOVN_I_I : CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
+def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
- def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
- def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
- def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd,
- NoItinerary>, ADD_FM<0, 0xb>;
+ def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
+ def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, IIArith>,
+ ADD_FM<0, 0xb>;
}
-def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVZ_I_S : CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<18, 16>;
let isCodeGenOnly = 1 in
-def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>;
-def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVN_I_S : CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<19, 16>;
let isCodeGenOnly = 1 in
-def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32RegsOpnd, IIFmove>,
+def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, IIFmove>,
CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>,
+ def MOVZ_I_D32 : CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, IIFmove>,
CMov_I_F_FM<18, 17>;
- def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64RegsOpnd, IIFmove>,
+ def MOVN_I_D32 : CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, IIFmove>,
CMov_I_F_FM<19, 17>;
}
-let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in {
- def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>,
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, IIFmove>,
CMov_I_F_FM<18, 17>;
- def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64RegsOpnd,
- IIFmove>, CMov_I_F_FM<18, 17>;
- def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64RegsOpnd, IIFmove>,
+ def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, IIFmove>,
CMov_I_F_FM<19, 17>;
- def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64RegsOpnd,
- IIFmove>, CMov_I_F_FM<19, 17>;
+ let isCodeGenOnly = 1 in {
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd,
+ IIFmove>, CMov_I_F_FM<18, 17>;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd,
+ IIFmove>, CMov_I_F_FM<19, 17>;
+ }
}
-def MOVT_I : CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>,
+def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, IIArith, MipsCMovFP_T>,
CMov_F_I_FM<1>;
let isCodeGenOnly = 1 in
def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, IIArith, MipsCMovFP_T>,
CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]>;
-def MOVF_I : CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>,
+def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, IIArith, MipsCMovFP_F>,
CMov_F_I_FM<0>;
let isCodeGenOnly = 1 in
def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, IIArith, MipsCMovFP_F>,
CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>;
-def MOVT_S : CMov_F_F_FT<"movt.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_T>,
+def MOVT_S : CMov_F_F_FT<"movt.s", FGR32Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<16, 1>;
-def MOVF_S : CMov_F_F_FT<"movf.s", FGR32RegsOpnd, IIFmove, MipsCMovFP_F>,
+def MOVF_S : CMov_F_F_FT<"movf.s", FGR32Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<16, 0>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_T>,
+ def MOVT_D32 : CMov_F_F_FT<"movt.d", AFGR64Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<17, 1>;
- def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64RegsOpnd, IIFmove, MipsCMovFP_F>,
+ def MOVF_D32 : CMov_F_F_FT<"movf.d", AFGR64Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<17, 0>;
}
-let Predicates = [IsFP64bit, HasStdEnc], isCodeGenOnly = 1 in {
- def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_T>,
+
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, IIFmove, MipsCMovFP_T>,
CMov_F_F_FM<17, 1>;
- def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64RegsOpnd, IIFmove, MipsCMovFP_F>,
+ def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, IIFmove, MipsCMovFP_F>,
CMov_F_F_FM<17, 0>;
}
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index bda0167..c46bbac 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -9,9 +9,7 @@
//
//
// This pass is used to make Pc relative loads of constants.
-// For now, only Mips16 will use this. While it has the same name and
-// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended
-// to reuse any of the code from the ARM version.
+// For now, only Mips16 will use this.
//
// Loading constants inline is expensive on Mips16 and it's in general better
// to place the constant nearby in code space and then it can be loaded with a
@@ -27,31 +25,244 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips16InstrInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Format.h"
+#include <algorithm>
using namespace llvm;
+STATISTIC(NumCPEs, "Number of constpool entries");
+STATISTIC(NumSplit, "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("mips-align-constant-islands", cl::Hidden, cl::init(true),
+ cl::desc("Align constant islands in code"));
+
+
+// Rather than do make check tests with huge amounts of code, we force
+// the test to use this amount.
+//
+static cl::opt<int> ConstantIslandsSmallOffset(
+ "mips-constant-islands-small-offset",
+ cl::init(0),
+ cl::desc("Make small offsets be this amount for testing purposes"),
+ cl::Hidden);
+
+//
+// For testing purposes we tell it to not use relaxed load forms so that it
+// will split blocks.
+//
+static cl::opt<bool> NoLoadRelaxation(
+ "mips-constant-islands-no-load-relaxation",
+ cl::init(false),
+ cl::desc("Don't relax loads to long loads - for testing purposes"),
+ cl::Hidden);
+
+
namespace {
+
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
+ /// MipsConstantIslands - Due to limited PC-relative displacements, Mips
+ /// requires constant pool entries to be scattered among the instructions
+ /// inside a function. To do this, it completely ignores the normal LLVM
+ /// constant pool; instead, it places constants wherever it feels like with
+ /// special instructions.
+ ///
+ /// The terminology used in this pass includes:
+ /// Islands - Clumps of constants placed in the function.
+ /// Water - Potential places where an island could be formed.
+ /// CPE - A constant pool entry that has been placed somewhere, which
+ /// tracks a list of users.
+
class MipsConstantIslands : public MachineFunctionPass {
+ /// BasicBlockInfo - Information about the offset and size of a single
+ /// basic block.
+ struct BasicBlockInfo {
+ /// Offset - Distance from the beginning of the function to the beginning
+ /// of this basic block.
+ ///
+ /// Offsets are computed assuming worst case padding before an aligned
+ /// block. This means that subtracting basic block offsets always gives a
+ /// conservative estimate of the real distance which may be smaller.
+ ///
+ /// Because worst case padding is used, the computed offset of an aligned
+ /// block may not actually be aligned.
+ unsigned Offset;
+
+ /// Size - Size of the basic block in bytes. If the block contains
+ /// inline assembly, this is a worst case estimate.
+ ///
+ /// The size does not include any alignment padding whether from the
+ /// beginning of the block, or from an aligned jump table at the end.
+ unsigned Size;
+
+ // FIXME: ignore LogAlign for this patch
+ //
+ unsigned postOffset(unsigned LogAlign = 0) const {
+ unsigned PO = Offset + Size;
+ return PO;
+ }
+
+ BasicBlockInfo() : Offset(0), Size(0) {}
+
+ };
+
+ std::vector<BasicBlockInfo> BBInfo;
+
+ /// WaterList - A sorted list of basic blocks where islands could be placed
+ /// (i.e. blocks that don't fall through to the following block, due
+ /// to a return, unreachable, or unconditional branch).
+ std::vector<MachineBasicBlock*> WaterList;
+
+ /// NewWaterList - The subset of WaterList that was created since the
+ /// previous iteration by inserting unconditional branches.
+ SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+ typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+ /// CPUser - One user of a constant pool, keeping the machine instruction
+ /// pointer, the constant pool being referenced, and the max displacement
+ /// allowed from the instruction to the CP. The HighWaterMark records the
+ /// highest basic block where a new CPEntry can be placed. To ensure this
+ /// pass terminates, the CP entries are initially placed at the end of the
+ /// function and then move monotonically to lower addresses. The
+ /// exception to this rule is when the current CP entry for a particular
+ /// CPUser is out of range, but there is another CP entry for the same
+ /// constant value in range. We want to use the existing in-range CP
+ /// entry, but if it later moves out of range, the search for new water
+ /// should resume where it left off. The HighWaterMark is used to record
+ /// that point.
+ struct CPUser {
+ MachineInstr *MI;
+ MachineInstr *CPEMI;
+ MachineBasicBlock *HighWaterMark;
+ private:
+ unsigned MaxDisp;
+ unsigned LongFormMaxDisp; // mips16 has 16/32 bit instructions
+ // with different displacements
+ unsigned LongFormOpcode;
+ public:
+ bool NegOk;
+ CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+ bool neg,
+ unsigned longformmaxdisp, unsigned longformopcode)
+ : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp),
+ LongFormMaxDisp(longformmaxdisp), LongFormOpcode(longformopcode),
+ NegOk(neg){
+ HighWaterMark = CPEMI->getParent();
+ }
+ /// getMaxDisp - Returns the maximum displacement supported by MI.
+ unsigned getMaxDisp() const {
+ unsigned xMaxDisp = ConstantIslandsSmallOffset?
+ ConstantIslandsSmallOffset: MaxDisp;
+ return xMaxDisp;
+ }
+ void setMaxDisp(unsigned val) {
+ MaxDisp = val;
+ }
+ unsigned getLongFormMaxDisp() const {
+ return LongFormMaxDisp;
+ }
+ unsigned getLongFormOpcode() const {
+ return LongFormOpcode;
+ }
+ };
+
+ /// CPUsers - Keep track of all of the machine instructions that use various
+ /// constant pools and their max displacement.
+ std::vector<CPUser> CPUsers;
+
+ /// CPEntry - One per constant pool entry, keeping the machine instruction
+ /// pointer, the constpool index, and the number of CPUser's which
+ /// reference this entry.
+ struct CPEntry {
+ MachineInstr *CPEMI;
+ unsigned CPI;
+ unsigned RefCount;
+ CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+ : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+ };
+
+ /// CPEntries - Keep track of all of the constant pool entry machine
+ /// instructions. For each original constpool index (i.e. those that
+ /// existed upon entry to this pass), it keeps a vector of entries.
+ /// Original elements are cloned as we go along; the clones are
+ /// put in the vector of the original element, but have distinct CPIs.
+ std::vector<std::vector<CPEntry> > CPEntries;
+
+ /// ImmBranch - One per immediate branch, keeping the machine instruction
+ /// pointer, conditional or unconditional, the max displacement,
+ /// and (if isCond is true) the corresponding unconditional branch
+ /// opcode.
+ struct ImmBranch {
+ MachineInstr *MI;
+ unsigned MaxDisp : 31;
+ bool isCond : 1;
+ int UncondBr;
+ ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+ : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+ };
+
+ /// ImmBranches - Keep track of all the immediate branch instructions.
+ ///
+ std::vector<ImmBranch> ImmBranches;
+
+ /// HasFarJump - True if any far jump instruction has been emitted during
+ /// the branch fix up pass.
+ bool HasFarJump;
+
+ const TargetMachine &TM;
+ bool IsPIC;
+ unsigned ABI;
+ const MipsSubtarget *STI;
+ const Mips16InstrInfo *TII;
+ MipsFunctionInfo *MFI;
+ MachineFunction *MF;
+ MachineConstantPool *MCP;
+
+ unsigned PICLabelUId;
+ bool PrescannedForConstants;
+
+ void initPICLabelUId(unsigned UId) {
+ PICLabelUId = UId;
+ }
+
+
+ unsigned createPICLabelUId() {
+ return PICLabelUId++;
+ }
+
public:
static char ID;
MipsConstantIslands(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
- ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {}
+ ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
+ STI(&TM.getSubtarget<MipsSubtarget>()), MF(0), MCP(0),
+ PrescannedForConstants(false){}
virtual const char *getPassName() const {
return "Mips Constant Islands";
@@ -59,26 +270,1264 @@ namespace {
bool runOnMachineFunction(MachineFunction &F);
+ void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+ CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ unsigned getCPELogAlign(const MachineInstr *CPEMI);
+ void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+ unsigned getOffsetOf(MachineInstr *MI) const;
+ unsigned getUserOffset(CPUser&) const;
+ void dumpBBs();
+ void verify();
+
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ unsigned Disp, bool NegativeOK);
+ bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U);
+
+ bool isLongFormOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U);
+
+ void computeBlockSize(MachineBasicBlock *MBB);
+ MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+ void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+ void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+ bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+ int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ int findLongFormInRangeCPEntry(CPUser& U, unsigned UserOffset);
+ bool findAvailableWater(CPUser&U, unsigned UserOffset,
+ water_iterator &WaterIter);
+ void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
+ MachineBasicBlock *&NewMBB);
+ bool handleConstantPoolUser(unsigned CPUserIndex);
+ void removeDeadCPEMI(MachineInstr *CPEMI);
+ bool removeUnusedCPEntries();
+ bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+ bool DoDump = false);
+ bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+ CPUser &U, unsigned &Growth);
+ bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+ bool fixupImmediateBr(ImmBranch &Br);
+ bool fixupConditionalBr(ImmBranch &Br);
+ bool fixupUnconditionalBr(ImmBranch &Br);
+
+ void prescanForConstants();
+
private:
- const TargetMachine &TM;
- bool IsPIC;
- unsigned ABI;
+
};
char MipsConstantIslands::ID = 0;
} // end of anonymous namespace
+
+bool MipsConstantIslands::isLongFormOffsetInRange
+ (unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getLongFormMaxDisp(), U.NegOk);
+}
+
+bool MipsConstantIslands::isOffsetInRange
+ (unsigned UserOffset, unsigned TrialOffset,
+ const CPUser &U) {
+ return isOffsetInRange(UserOffset, TrialOffset,
+ U.getMaxDisp(), U.NegOk);
+}
+/// print block size and offset information - debugging
+void MipsConstantIslands::dumpBBs() {
+ DEBUG({
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
+ });
+}
/// createMipsLongBranchPass - Returns a pass that converts branches to long
/// branches.
FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
return new MipsConstantIslands(tm);
}
-bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
+bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// The intention is for this to be a mips16 only pass for now
// FIXME:
- // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
- // return false;
+ MF = &mf;
+ MCP = mf.getConstantPool();
+ DEBUG(dbgs() << "constant island machine function " << "\n");
+ if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode() ||
+ !MipsSubtarget::useConstantIslands()) {
+ return false;
+ }
+ TII = (const Mips16InstrInfo*)MF->getTarget().getInstrInfo();
+ MFI = MF->getInfo<MipsFunctionInfo>();
+ DEBUG(dbgs() << "constant island processing " << "\n");
+ //
+ // will need to make predermination if there is any constants we need to
+ // put in constant islands. TBD.
+ //
+ if (!PrescannedForConstants) prescanForConstants();
+
+ HasFarJump = false;
+ // This pass invalidates liveness information when it splits basic blocks.
+ MF->getRegInfo().invalidateLiveness();
+
+ // Renumber all of the machine basic blocks in the function, guaranteeing that
+ // the numbers agree with the position of the block in the function.
+ MF->RenumberBlocks();
+
+ bool MadeChange = false;
+
+ // Perform the initial placement of the constant pool entries. To start with,
+ // we put them all at the end of the function.
+ std::vector<MachineInstr*> CPEMIs;
+ if (!MCP->isEmpty())
+ doInitialPlacement(CPEMIs);
+
+ /// The next UID to take is the first unused one.
+ initPICLabelUId(CPEMIs.size());
+
+ // Do the initial scan of the function, building up information about the
+ // sizes of each block, the location of all the water, and finding all of the
+ // constant pool users.
+ initializeFunctionInfo(CPEMIs);
+ CPEMIs.clear();
+ DEBUG(dumpBBs());
+
+ /// Remove dead constant pool entries.
+ MadeChange |= removeUnusedCPEntries();
+
+ // Iteratively place constant pool entries and fix up branches until there
+ // is no change.
+ unsigned NoCPIters = 0, NoBRIters = 0;
+ (void)NoBRIters;
+ while (true) {
+ DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
+ bool CPChange = false;
+ for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+ CPChange |= handleConstantPoolUser(i);
+ if (CPChange && ++NoCPIters > 30)
+ report_fatal_error("Constant Island pass failed to converge!");
+ DEBUG(dumpBBs());
+
+ // Clear NewWaterList now. If we split a block for branches, it should
+ // appear as "new water" for the next iteration of constant pool placement.
+ NewWaterList.clear();
+
+ DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
+ bool BRChange = false;
+ for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+ BRChange |= fixupImmediateBr(ImmBranches[i]);
+ if (BRChange && ++NoBRIters > 30)
+ report_fatal_error("Branch Fix Up pass failed to converge!");
+ DEBUG(dumpBBs());
+ if (!CPChange && !BRChange)
+ break;
+ MadeChange = true;
+ }
+
+ DEBUG(dbgs() << '\n'; dumpBBs());
+
+ BBInfo.clear();
+ WaterList.clear();
+ CPUsers.clear();
+ CPEntries.clear();
+ ImmBranches.clear();
+ return MadeChange;
+}
+
+/// doInitialPlacement - Perform the initial placement of the constant pool
+/// entries. To start with, we put them all at the end of the function.
+void
+MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ // Create the basic block to hold the CPE's.
+ MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+ MF->push_back(BB);
+
+
+ // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+ unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+ // Mark the basic block as required by the const-pool.
+ // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+ BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+ // The function needs to be as aligned as the basic blocks. The linker may
+ // move functions around based on their alignment.
+ MF->ensureAlignment(BB->getAlignment());
+
+ // Order the entries in BB by descending alignment. That ensures correct
+ // alignment of all entries as long as BB is sufficiently aligned. Keep
+ // track of the insertion point for each alignment. We are going to bucket
+ // sort the entries as they are created.
+ SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
+
+ // Add all of the constants from the constant pool to the end block, use an
+ // identity mapping of CPI's to CPE's.
+ const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
+
+ const DataLayout &TD = *MF->getTarget().getDataLayout();
+ for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+ unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+ assert(Size >= 4 && "Too small constant pool entry");
+ unsigned Align = CPs[i].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid alignment");
+ // Verify that all constant pool entries are a multiple of their alignment.
+ // If not, we would have to pad them out so that instructions stay aligned.
+ assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+ // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+ unsigned LogAlign = Log2_32(Align);
+ MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
+
+ MachineInstr *CPEMI =
+ BuildMI(*BB, InsAt, DebugLoc(), TII->get(Mips::CONSTPOOL_ENTRY))
+ .addImm(i).addConstantPoolIndex(i).addImm(Size);
+
+ CPEMIs.push_back(CPEMI);
+
+ // Ensure that future entries with higher alignment get inserted before
+ // CPEMI. This is bucket sort with iterators.
+ for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+ if (InsPoint[a] == InsAt)
+ InsPoint[a] = CPEMI;
+ // Add a new CPEntry, but no corresponding CPUser yet.
+ std::vector<CPEntry> CPEs;
+ CPEs.push_back(CPEntry(CPEMI, i));
+ CPEntries.push_back(CPEs);
+ ++NumCPEs;
+ DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+ << Size << ", align = " << Align <<'\n');
+ }
+ DEBUG(BB->dump());
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+ // Get the next machine basic block in the function.
+ MachineFunction::iterator MBBI = MBB;
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
+ return false;
+
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I == NextBB)
+ return true;
+
return false;
}
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+MipsConstantIslands::CPEntry
+*MipsConstantIslands::findConstPoolEntry(unsigned CPI,
+ const MachineInstr *CPEMI) {
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ // Number of entries per constpool index should be small, just do a
+ // linear search.
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ if (CPEs[i].CPEMI == CPEMI)
+ return &CPEs[i];
+ }
+ return NULL;
+}
+
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI. Alignment is measured in log2(bytes) units.
+unsigned MipsConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+ assert(CPEMI && CPEMI->getOpcode() == Mips::CONSTPOOL_ENTRY);
+
+ // Everything is 4-byte aligned unless AlignConstantIslands is set.
+ if (!AlignConstantIslands)
+ return 2;
+
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+ unsigned Align = MCP->getConstants()[CPI].getAlignment();
+ assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+ return Log2_32(Align);
+}
+
+/// initializeFunctionInfo - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void MipsConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+ BBInfo.clear();
+ BBInfo.resize(MF->getNumBlockIDs());
+
+ // First thing, compute the size of all basic blocks, and see if the function
+ // has any inline assembly in it. If so, we have to be conservative about
+ // alignment assumptions, as we don't know for sure the size of any
+ // instructions in the inline assembly.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+ computeBlockSize(I);
+
+
+ // Compute block offsets.
+ adjustBBOffsetsAfter(MF->begin());
+
+ // Now go back through the instructions and build up our data structures.
+ for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ // If this block doesn't fall through into the next MBB, then this is
+ // 'water' that a constant pool island could be placed.
+ if (!BBHasFallthrough(&MBB))
+ WaterList.push_back(&MBB);
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
+ int Opc = I->getOpcode();
+ if (I->isBranch()) {
+ bool isCond = false;
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ int UOpc = Opc;
+ switch (Opc) {
+ default:
+ continue; // Ignore other branches for now
+ case Mips::Bimm16:
+ Bits = 11;
+ Scale = 2;
+ isCond = false;
+ break;
+ case Mips::BimmX16:
+ Bits = 16;
+ Scale = 2;
+ isCond = false;
+ }
+ // Record this immediate branch.
+ unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+ ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+ }
+
+ if (Opc == Mips::CONSTPOOL_ENTRY)
+ continue;
+
+
+ // Scan the instructions for constant pool operands.
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+ if (I->getOperand(op).isCPI()) {
+
+ // We found one. The addressing mode tells us the max displacement
+ // from the PC that this instruction permits.
+
+ // Basic size info comes from the TSFlags field.
+ unsigned Bits = 0;
+ unsigned Scale = 1;
+ bool NegOk = false;
+ unsigned LongFormBits = 0;
+ unsigned LongFormScale = 0;
+ unsigned LongFormOpcode = 0;
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown addressing mode for CP reference!");
+ case Mips::LwRxPcTcp16:
+ Bits = 8;
+ Scale = 4;
+ LongFormOpcode = Mips::LwRxPcTcpX16;
+ LongFormBits = 16;
+ LongFormScale = 1;
+ break;
+ case Mips::LwRxPcTcpX16:
+ Bits = 16;
+ Scale = 1;
+ NegOk = true;
+ break;
+ }
+ // Remember that this is a user of a CP entry.
+ unsigned CPI = I->getOperand(op).getIndex();
+ MachineInstr *CPEMI = CPEMIs[CPI];
+ unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+ unsigned LongFormMaxOffs = ((1 << LongFormBits)-1) * LongFormScale;
+ CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk,
+ LongFormMaxOffs, LongFormOpcode));
+
+ // Increment corresponding CPEntry reference count.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Cannot find a corresponding CPEntry!");
+ CPE->RefCount++;
+
+ // Instructions can only use one CP entry, don't bother scanning the
+ // rest of the operands.
+ break;
+
+ }
+
+ }
+ }
+
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void MipsConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+ BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+ BBI.Size = 0;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I)
+ BBI.Size += TII->GetInstSizeInBytes(I);
+
+}
+
+/// getOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function. This offset changes as stuff is moved
+/// around inside the function.
+unsigned MipsConstantIslands::getOffsetOf(MachineInstr *MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+
+ // The offset is composed of two things: the sum of the sizes of all MBB's
+ // before this instruction's block, and the offset from the start of the block
+ // it is in.
+ unsigned Offset = BBInfo[MBB->getNumber()].Offset;
+
+ // Sum instructions before MI in MBB.
+ for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
+ assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+ Offset += TII->GetInstSizeInBytes(I);
+ }
+ return Offset;
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+ const MachineBasicBlock *RHS) {
+ return LHS->getNumber() < RHS->getNumber();
+}
+
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers. Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void MipsConstantIslands::updateForInsertedWaterBlock
+ (MachineBasicBlock *NewBB) {
+ // Renumber the MBB's to keep them consecutive.
+ NewBB->getParent()->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add NewMBB as having
+ // available water after it.
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+ CompareMBBNumbers);
+ WaterList.insert(IP, NewBB);
+}
+
+unsigned MipsConstantIslands::getUserOffset(CPUser &U) const {
+ return getOffsetOf(U.MI);
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch. Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr
+ (MachineInstr *MI) {
+ MachineBasicBlock *OrigBB = MI->getParent();
+
+ // Create a new MBB for the code after the OrigBB.
+ MachineBasicBlock *NewBB =
+ MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+ MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MF->insert(MBBI, NewBB);
+
+ // Splice the instructions starting with MI over to NewBB.
+ NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+ // Add an unconditional branch from OrigBB to NewBB.
+ // Note the new unconditional branch is not being recorded.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond to anything in the source.
+ BuildMI(OrigBB, DebugLoc(), TII->get(Mips::Bimm16)).addMBB(NewBB);
+ ++NumSplit;
+
+ // Update the CFG. All succs of OrigBB are now succs of NewBB.
+ NewBB->transferSuccessors(OrigBB);
+
+ // OrigBB branches to NewBB.
+ OrigBB->addSuccessor(NewBB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ // This is almost the same as updateForInsertedWaterBlock, except that
+ // the Water goes after OrigBB, not NewBB.
+ MF->RenumberBlocks(NewBB);
+
+ // Insert an entry into BBInfo to align it properly with the (newly
+ // renumbered) block numbers.
+ BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+ // Next, update WaterList. Specifically, we need to add OrigMBB as having
+ // available water after it (but not if it's already there, which happens
+ // when splitting before a conditional branch that is followed by an
+ // unconditional branch - in that case we want to insert NewBB).
+ water_iterator IP =
+ std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+ CompareMBBNumbers);
+ MachineBasicBlock* WaterBB = *IP;
+ if (WaterBB == OrigBB)
+ WaterList.insert(llvm::next(IP), NewBB);
+ else
+ WaterList.insert(IP, OrigBB);
+ NewWaterList.insert(OrigBB);
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ computeBlockSize(OrigBB);
+
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ computeBlockSize(NewBB);
+
+ // All BBOffsets following these blocks must be modified.
+ adjustBBOffsetsAfter(OrigBB);
+
+ return NewBB;
+}
+
+
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool MipsConstantIslands::isOffsetInRange(unsigned UserOffset,
+ unsigned TrialOffset, unsigned MaxDisp,
+ bool NegativeOK) {
+ if (UserOffset <= TrialOffset) {
+ // User before the Trial.
+ if (TrialOffset - UserOffset <= MaxDisp)
+ return true;
+ } else if (NegativeOK) {
+ if (UserOffset - TrialOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// isWaterInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
+ MachineBasicBlock* Water, CPUser &U,
+ unsigned &Growth) {
+ unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+ unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+ unsigned NextBlockOffset, NextBlockAlignment;
+ MachineFunction::const_iterator NextBlock = Water;
+ if (++NextBlock == MF->end()) {
+ NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+ NextBlockAlignment = 0;
+ } else {
+ NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+ NextBlockAlignment = NextBlock->getAlignment();
+ }
+ unsigned Size = U.CPEMI->getOperand(2).getImm();
+ unsigned CPEEnd = CPEOffset + Size;
+
+ // The CPE may be able to hide in the alignment padding before the next
+ // block. It may also cause more padding to be required if it is more aligned
+ // that the next block.
+ if (CPEEnd > NextBlockOffset) {
+ Growth = CPEEnd - NextBlockOffset;
+ // Compute the padding that would go at the end of the CPE to align the next
+ // block.
+ Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+ // If the CPE is to be inserted before the instruction, that will raise
+ // the offset of the instruction. Also account for unknown alignment padding
+ // in blocks between CPE and the user.
+ if (CPEOffset < UserOffset)
+ UserOffset += Growth;
+ } else
+ // CPE fits in existing padding.
+ Growth = 0;
+
+ return isOffsetInRange(UserOffset, CPEOffset, U);
+}
+
+/// isCPEntryInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool MipsConstantIslands::isCPEntryInRange
+ (MachineInstr *MI, unsigned UserOffset,
+ MachineInstr *CPEMI, unsigned MaxDisp,
+ bool NegOk, bool DoDump) {
+ unsigned CPEOffset = getOffsetOf(CPEMI);
+
+ if (DoDump) {
+ DEBUG({
+ unsigned Block = MI->getParent()->getNumber();
+ const BasicBlockInfo &BBI = BBInfo[Block];
+ dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+ << " max delta=" << MaxDisp
+ << format(" insn address=%#x", UserOffset)
+ << " in BB#" << Block << ": "
+ << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+ << format("CPE address=%#x offset=%+d: ", CPEOffset,
+ int(CPEOffset-UserOffset));
+ });
+ }
+
+ return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+ if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+ return false;
+ MachineBasicBlock *Succ = *MBB->succ_begin();
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineInstr *PredMI = &Pred->back();
+ if (PredMI->getOpcode() == Mips::Bimm16)
+ return PredMI->getOperand(0).getMBB() == Succ;
+ return false;
+}
+#endif
+
+void MipsConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+ unsigned BBNum = BB->getNumber();
+ for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+ // Get the offset and known bits at the end of the layout predecessor.
+ // Include the alignment of the current block.
+ unsigned Offset = BBInfo[i - 1].Offset + BBInfo[i - 1].Size;
+ BBInfo[i].Offset = Offset;
+ }
+}
+
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount. If the refcount
+/// becomes 0 remove the entry and instruction. Returns true if we removed
+/// the entry, false if we didn't.
+
+bool MipsConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+ MachineInstr *CPEMI) {
+ // Find the old entry. Eliminate it if it is no longer used.
+ CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+ assert(CPE && "Unexpected!");
+ if (--CPE->RefCount == 0) {
+ removeDeadCPEMI(CPEMI);
+ CPE->CPEMI = NULL;
+ --NumCPEs;
+ return true;
+ }
+ return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ return 1;
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+ U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// This version checks if the longer form of the instruction can be used to
+/// to satisfy things.
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone. Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int MipsConstantIslands::findLongFormInRangeCPEntry
+ (CPUser& U, unsigned UserOffset)
+{
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+
+ // Check to see if the CPE is already in-range.
+ if (isCPEntryInRange(UserMI, UserOffset, CPEMI,
+ U.getLongFormMaxDisp(), U.NegOk,
+ true)) {
+ DEBUG(dbgs() << "In range\n");
+ UserMI->setDesc(TII->get(U.getLongFormOpcode()));
+ U.setMaxDisp(U.getLongFormMaxDisp());
+ return 2; // instruction is longer length now
+ }
+
+ // No. Look for previously created clones of the CPE that are in range.
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ std::vector<CPEntry> &CPEs = CPEntries[CPI];
+ for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+ // We already tried this one
+ if (CPEs[i].CPEMI == CPEMI)
+ continue;
+ // Removing CPEs can leave empty entries, skip
+ if (CPEs[i].CPEMI == NULL)
+ continue;
+ if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI,
+ U.getLongFormMaxDisp(), U.NegOk)) {
+ DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
+ << CPEs[i].CPI << "\n");
+ // Point the CPUser node to the replacement
+ U.CPEMI = CPEs[i].CPEMI;
+ // Change the CPI in the instruction operand to refer to the clone.
+ for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+ if (UserMI->getOperand(j).isCPI()) {
+ UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ break;
+ }
+ // Adjust the refcount of the clone...
+ CPEs[i].RefCount++;
+ // ...and the original. If we didn't remove the old entry, none of the
+ // addresses changed, so we don't need another pass.
+ return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
+ }
+ }
+ return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+ switch (Opc) {
+ case Mips::Bimm16:
+ return ((1<<10)-1)*2;
+ case Mips::BimmX16:
+ return ((1<<16)-1)*2;
+ default:
+ break;
+ }
+ return ((1<<16)-1)*2;
+}
+
+/// findAvailableWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not. If it returns true, WaterIter
+/// is set to the WaterList entry.
+/// To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool MipsConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
+ water_iterator &WaterIter) {
+ if (WaterList.empty())
+ return false;
+
+ unsigned BestGrowth = ~0u;
+ for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+ --IP) {
+ MachineBasicBlock* WaterBB = *IP;
+ // Check if water is in range and is either at a lower address than the
+ // current "high water mark" or a new water block that was created since
+ // the previous iteration by inserting an unconditional branch. In the
+ // latter case, we want to allow resetting the high water mark back to
+ // this new water since we haven't seen it before. Inserting branches
+ // should be relatively uncommon and when it does happen, we want to be
+ // sure to take advantage of it for all the CPEs near that block, so that
+ // we don't insert more branches than necessary.
+ unsigned Growth;
+ if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
+ (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+ NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+ // This is the least amount of required padding seen so far.
+ BestGrowth = Growth;
+ WaterIter = IP;
+ DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+ << " Growth=" << Growth << '\n');
+
+ // Keep looking unless it is perfect.
+ if (BestGrowth == 0)
+ return true;
+ }
+ if (IP == B)
+ break;
+ }
+ return BestGrowth != ~0u;
+}
+
+/// createNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct. Otherwise the block is split to create a hole with an
+/// unconditional branch around it. In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
+ unsigned UserOffset,
+ MachineBasicBlock *&NewMBB) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPELogAlign = getCPELogAlign(CPEMI);
+ MachineBasicBlock *UserMBB = UserMI->getParent();
+ const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
+
+ // If the block does not end in an unconditional branch already, and if the
+ // end of the block is within range, make new water there.
+ if (BBHasFallthrough(UserMBB)) {
+ // Size of branch to insert.
+ unsigned Delta = 2;
+ // Compute the offset where the CPE will begin.
+ unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
+
+ if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+ DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+ << format(", expected CPE offset %#x\n", CPEOffset));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+ // Add an unconditional branch from UserMBB to fallthrough block. Record
+ // it for branch lengthening; this new branch will not get out of range,
+ // but if the preceding conditional branch is out of range, the targets
+ // will be exchanged, and the altered branch may be out of range, so the
+ // machinery has to know about it.
+ int UncondBr = Mips::Bimm16;
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+ unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+ ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+ MaxDisp, false, UncondBr));
+ BBInfo[UserMBB->getNumber()].Size += Delta;
+ adjustBBOffsetsAfter(UserMBB);
+ return;
+ }
+ }
+
+ // What a big block. Find a place within the block to split it.
+
+ // Try to split the block so it's fully aligned. Compute the latest split
+ // point where we can add a 4-byte branch instruction, and then align to
+ // LogAlign which is the largest possible alignment in the function.
+ unsigned LogAlign = MF->getAlignment();
+ assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+ unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+ DEBUG(dbgs() << format("Split in middle of big block before %#x",
+ BaseInsertOffset));
+
+ // The 4 in the following is for the unconditional branch we'll be inserting
+ // Alignment of the island is handled
+ // inside isOffsetInRange.
+ BaseInsertOffset -= 4;
+
+ DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+ << " la=" << LogAlign << '\n');
+
+ // This could point off the end of the block if we've already got constant
+ // pool entries following this block; only the last one is in the water list.
+ // Back past any possible branches (allow for a conditional and a maximally
+ // long unconditional).
+ if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
+ BaseInsertOffset = UserBBI.postOffset() - 8;
+ DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
+ }
+ unsigned EndInsertOffset = BaseInsertOffset + 4 +
+ CPEMI->getOperand(2).getImm();
+ MachineBasicBlock::iterator MI = UserMI;
+ ++MI;
+ unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ //MachineInstr *LastIT = 0;
+ for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+ Offset < BaseInsertOffset;
+ Offset += TII->GetInstSizeInBytes(MI),
+ MI = llvm::next(MI)) {
+ assert(MI != UserMBB->end() && "Fell off end of block");
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+ CPUser &U = CPUsers[CPUIndex];
+ if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+ // Shift intertion point by one unit of alignment so it is within reach.
+ BaseInsertOffset -= 1u << LogAlign;
+ EndInsertOffset -= 1u << LogAlign;
+ }
+ // This is overly conservative, as we don't account for CPEMIs being
+ // reused within the block, but it doesn't matter much. Also assume CPEs
+ // are added in order with alignment padding. We may eventually be able
+ // to pack the aligned CPEs better.
+ EndInsertOffset += U.CPEMI->getOperand(2).getImm();
+ CPUIndex++;
+ }
+ }
+
+ --MI;
+ NewMBB = splitBlockBeforeInstr(MI);
+}
+
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range. If so, pick up the constant pool value and move it some
+/// place in-range. Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
+ CPUser &U = CPUsers[CPUserIndex];
+ MachineInstr *UserMI = U.MI;
+ MachineInstr *CPEMI = U.CPEMI;
+ unsigned CPI = CPEMI->getOperand(1).getIndex();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ // Compute this only once, it's expensive.
+ unsigned UserOffset = getUserOffset(U);
+
+ // See if the current entry is within range, or there is a clone of it
+ // in range.
+ int result = findInRangeCPEntry(U, UserOffset);
+ if (result==1) return false;
+ else if (result==2) return true;
+
+
+ // Look for water where we can place this CPE.
+ MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
+ MachineBasicBlock *NewMBB;
+ water_iterator IP;
+ if (findAvailableWater(U, UserOffset, IP)) {
+ DEBUG(dbgs() << "Found water in range\n");
+ MachineBasicBlock *WaterBB = *IP;
+
+ // If the original WaterList entry was "new water" on this iteration,
+ // propagate that to the new island. This is just keeping NewWaterList
+ // updated to match the WaterList, which will be updated below.
+ if (NewWaterList.erase(WaterBB))
+ NewWaterList.insert(NewIsland);
+
+ // The new CPE goes before the following block (NewMBB).
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+ } else {
+ // No water found.
+ // we first see if a longer form of the instrucion could have reached
+ // the constant. in that case we won't bother to split
+ if (!NoLoadRelaxation) {
+ result = findLongFormInRangeCPEntry(U, UserOffset);
+ if (result != 0) return true;
+ }
+ DEBUG(dbgs() << "No water found\n");
+ createNewWater(CPUserIndex, UserOffset, NewMBB);
+
+ // splitBlockBeforeInstr adds to WaterList, which is important when it is
+ // called while handling branches so that the water will be seen on the
+ // next iteration for constant pools, but in this context, we don't want
+ // it. Check for this so it will be removed from the WaterList.
+ // Also remove any entry from NewWaterList.
+ MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+ IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+ if (IP != WaterList.end())
+ NewWaterList.erase(WaterBB);
+
+ // We are adding new water. Update NewWaterList.
+ NewWaterList.insert(NewIsland);
+ }
+
+ // Remove the original WaterList entry; we want subsequent insertions in
+ // this vicinity to go after the one we're about to insert. This
+ // considerably reduces the number of times we have to move the same CPE
+ // more than once and is also important to ensure the algorithm terminates.
+ if (IP != WaterList.end())
+ WaterList.erase(IP);
+
+ // Okay, we know we can put an island before NewMBB now, do it!
+ MF->insert(NewMBB, NewIsland);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ updateForInsertedWaterBlock(NewIsland);
+
+ // Decrement the old entry, and remove it if refcount becomes 0.
+ decrementCPEReferenceCount(CPI, CPEMI);
+
+ // Now that we have an island to add the CPE to, clone the original CPE and
+ // add it to the island.
+ U.HighWaterMark = NewIsland;
+ U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(Mips::CONSTPOOL_ENTRY))
+ .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+ CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+ ++NumCPEs;
+
+ // Mark the basic block as aligned as required by the const-pool entry.
+ NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
+ // Increase the size of the island block to account for the new entry.
+ BBInfo[NewIsland->getNumber()].Size += Size;
+ adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
+
+ // No existing clone of this CPE is within range.
+ // We will be generating a new clone. Get a UID for it.
+ unsigned ID = createPICLabelUId();
+
+ // Finally, change the CPI in the instruction operand to be ID.
+ for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+ if (UserMI->getOperand(i).isCPI()) {
+ UserMI->getOperand(i).setIndex(ID);
+ break;
+ }
+
+ DEBUG(dbgs() << " Moved CPE to #" << ID << " CPI=" << CPI
+ << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
+
+ return true;
+}
+
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void MipsConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
+ MachineBasicBlock *CPEBB = CPEMI->getParent();
+ unsigned Size = CPEMI->getOperand(2).getImm();
+ CPEMI->eraseFromParent();
+ BBInfo[CPEBB->getNumber()].Size -= Size;
+ // All succeeding offsets have the current size value added in, fix this.
+ if (CPEBB->empty()) {
+ BBInfo[CPEBB->getNumber()].Size = 0;
+
+ // This block no longer needs to be aligned.
+ CPEBB->setAlignment(0);
+ } else
+ // Entries are sorted by descending alignment, so realign from the front.
+ CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+ adjustBBOffsetsAfter(CPEBB);
+ // An island has only one predecessor BB and one successor BB. Check if
+ // this BB's predecessor jumps directly to this BB's successor. This
+ // shouldn't happen currently.
+ assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+ // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool MipsConstantIslands::removeUnusedCPEntries() {
+ unsigned MadeChange = false;
+ for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+ std::vector<CPEntry> &CPEs = CPEntries[i];
+ for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+ if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+ removeDeadCPEMI(CPEs[j].CPEMI);
+ CPEs[j].CPEMI = NULL;
+ MadeChange = true;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+/// isBBInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool MipsConstantIslands::isBBInRange
+ (MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) {
+
+unsigned PCAdj = 4;
+
+ unsigned BrOffset = getOffsetOf(MI) + PCAdj;
+ unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
+
+ DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
+ << " from BB#" << MI->getParent()->getNumber()
+ << " max delta=" << MaxDisp
+ << " from " << getOffsetOf(MI) << " to " << DestOffset
+ << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+ if (BrOffset <= DestOffset) {
+ // Branch before the Dest.
+ if (DestOffset-BrOffset <= MaxDisp)
+ return true;
+ } else {
+ if (BrOffset-DestOffset <= MaxDisp)
+ return true;
+ }
+ return false;
+}
+
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool MipsConstantIslands::fixupImmediateBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Check to see if the DestBB is already in-range.
+ if (isBBInRange(MI, DestBB, Br.MaxDisp))
+ return false;
+
+ if (!Br.isCond)
+ return fixupUnconditionalBr(Br);
+ return fixupConditionalBr(Br);
+}
+
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ // Use BL to implement far jump.
+ Br.MaxDisp = ((1 << 16)-1) * 2;
+ MI->setDesc(TII->get(Mips::BimmX16));
+ BBInfo[MBB->getNumber()].Size += 2;
+ adjustBBOffsetsAfter(MBB);
+ HasFarJump = true;
+ ++NumUBrFixed;
+
+ DEBUG(dbgs() << " Changed B to long jump " << *MI);
+
+ return true;
+}
+
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
+ MachineInstr *MI = Br.MI;
+ MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+ // Add an unconditional branch to the destination and invert the branch
+ // condition to jump over it:
+ // blt L1
+ // =>
+ // bge L2
+ // b L1
+ // L2:
+ unsigned CCReg = 0; // FIXME
+ unsigned CC=0; //FIXME
+
+ // If the branch is at the end of its MBB and that has a fall-through block,
+ // direct the updated conditional branch to the fall-through block. Otherwise,
+ // split the MBB before the next instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineInstr *BMI = &MBB->back();
+ bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+ ++NumCBrFixed;
+ if (BMI != MI) {
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ BMI->getOpcode() == Br.UncondBr) {
+ // Last MI in the BB is an unconditional branch. Can we simply invert the
+ // condition and swap destinations:
+ // beq L1
+ // b L2
+ // =>
+ // bne L2
+ // b L1
+ MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+ if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+ DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
+ << *BMI);
+ BMI->getOperand(0).setMBB(DestBB);
+ MI->getOperand(0).setMBB(NewDest);
+ return true;
+ }
+ }
+ }
+
+ if (NeedSplit) {
+ splitBlockBeforeInstr(MI);
+ // No need for the branch to the next block. We're adding an unconditional
+ // branch to the destination.
+ int delta = TII->GetInstSizeInBytes(&MBB->back());
+ BBInfo[MBB->getNumber()].Size -= delta;
+ MBB->back().eraseFromParent();
+ // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
+ }
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+ DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
+ << " also invert condition and change dest. to BB#"
+ << NextBB->getNumber() << "\n");
+
+ // Insert a new conditional branch and a new unconditional branch.
+ // Also update the ImmBranch as well as adding a new entry for the new branch.
+ BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+ .addMBB(NextBB).addImm(CC).addReg(CCReg);
+ Br.MI = &MBB->back();
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+ BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
+ unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+ ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+ // Remove the old conditional branch. It may or may not still be in MBB.
+ BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
+ MI->eraseFromParent();
+ adjustBBOffsetsAfter(MBB);
+ return true;
+}
+
+
+void MipsConstantIslands::prescanForConstants() {
+ unsigned J = 0;
+ (void)J;
+ PrescannedForConstants = true;
+ for (MachineFunction::iterator B =
+ MF->begin(), E = MF->end(); B != E; ++B) {
+ for (MachineBasicBlock::instr_iterator I =
+ B->instr_begin(), EB = B->instr_end(); I != EB; ++I) {
+ switch(I->getDesc().getOpcode()) {
+ case Mips::LwConstant32: {
+ DEBUG(dbgs() << "constant island constant " << *I << "\n");
+ J = I->getNumOperands();
+ DEBUG(dbgs() << "num operands " << J << "\n");
+ MachineOperand& Literal = I->getOperand(1);
+ if (Literal.isImm()) {
+ int64_t V = Literal.getImm();
+ DEBUG(dbgs() << "literal " << V << "\n");
+ Type *Int32Ty =
+ Type::getInt32Ty(MF->getFunction()->getContext());
+ const Constant *C = ConstantInt::get(Int32Ty, V);
+ unsigned index = MCP->getConstantPoolIndex(C, 4);
+ I->getOperand(2).ChangeToImmediate(index);
+ DEBUG(dbgs() << "constant island constant " << *I << "\n");
+ I->setDesc(TII->get(Mips::LwRxPcTcp16));
+ I->RemoveOperand(1);
+ I->RemoveOperand(1);
+ I->addOperand(MachineOperand::CreateCPI(index, 0));
+ I->addOperand(MachineOperand::CreateImm(4));
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+}
+
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 526821a..d268384 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -256,236 +256,235 @@ class PREPEND_ENC : APPEND_FMT<0b00001>;
// Instruction desc.
class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS, RegisterClass RCT = RCS> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS, RegisterOperand ROT = ROS> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs))];
InstrItinClass Itinerary = itin;
}
class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCS,
- RegisterClass RCT = RCS> {
+ InstrItinClass itin, RegisterOperand ROS,
+ RegisterOperand ROT = ROS> {
dag OutOperandList = (outs);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
- list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
+ list<dag> Pattern = [(OpNode ROS:$rs, ROT:$rt)];
InstrItinClass Itinerary = itin;
}
class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS, RegisterClass RCT = RCS> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS, RegisterOperand ROT = ROS> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCT,
- RegisterClass RCS = RCT> {
- dag OutOperandList = (outs RCT:$rt);
- dag InOperandList = (ins RCS:$rs, shamt:$sa, RCS:$src);
+ InstrItinClass itin, RegisterOperand ROT,
+ RegisterOperand ROS = ROT> {
+ dag OutOperandList = (outs ROT:$rt);
+ dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
- list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
+ list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCT = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROT = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- ImmLeaf immPat, InstrItinClass itin, RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
+ ImmLeaf immPat, InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
dag InOperandList = (ins uimm16:$imm);
string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
- list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode immPat:$imm))];
InstrItinClass Itinerary = itin;
}
class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
- dag InOperandList = (ins RC:$rt, GPR32:$rs_sa);
+ InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
- list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, GPR32:$rs_sa))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))];
InstrItinClass Itinerary = itin;
}
class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmPat, InstrItinClass itin,
- RegisterClass RC> {
- dag OutOperandList = (outs RC:$rd);
- dag InOperandList = (ins RC:$rt, uimm16:$rs_sa);
+ RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, uimm16:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
- list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))];
InstrItinClass Itinerary = itin;
bit hasSideEffects = 1;
}
class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
- dag InOperandList = (ins GPR32:$base, GPR32:$index);
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins PtrRC:$base, PtrRC:$index);
string AsmString = !strconcat(instr_asm, "\t$rd, ${index}(${base})");
- list<dag> Pattern = [(set GPR32:$rd,
- (OpNode GPR32:$base, GPR32:$index))];
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))];
InstrItinClass Itinerary = itin;
bit mayLoad = 1;
}
class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin, RegisterClass RCD,
- RegisterClass RCS = RCD, RegisterClass RCT = RCD> {
- dag OutOperandList = (outs RCD:$rd);
- dag InOperandList = (ins RCS:$rs, RCT:$rt);
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD, RegisterOperand ROT = ROD> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROS:$rs, ROT:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
+ list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmOp, InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins GPR32:$rs, shamt:$sa, GPR32:$src);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
- list<dag> Pattern = [(set GPR32:$rt,
- (OpNode GPR32:$src, GPR32:$rs, ImmOp:$sa))];
+ list<dag> Pattern = [(set GPR32Opnd:$rt,
+ (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins ACRegsDSP:$ac, GPR32:$shift_rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
}
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
}
class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
+ dag OutOperandList = (outs GPR32Opnd:$rd);
dag InOperandList = (ins uimm16:$mask);
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
- list<dag> Pattern = [(set GPR32:$rd, (OpNode immZExt10:$mask))];
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin;
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs);
- dag InOperandList = (ins GPR32:$rs, uimm16:$mask);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
- list<dag> Pattern = [(OpNode GPR32:$rs, immZExt10:$mask)];
+ list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin;
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
}
class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode GPR32:$rs, GPR32:$rt))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))];
InstrItinClass Itinerary = itin;
- int AddedComplexity = 20;
bit isCommutable = 1;
}
class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs ACRegsDSP:$ac);
- dag InOperandList = (ins GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin);
+ dag OutOperandList = (outs ACC64DSPOpnd:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
- list<dag> Pattern = [(set ACRegsDSP:$ac,
- (OpNode GPR32:$rs, GPR32:$rt, ACRegsDSP:$acin))];
+ list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
InstrItinClass Itinerary = itin;
- int AddedComplexity = 20;
string Constraints = "$acin = $ac";
}
-class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rd);
- dag InOperandList = (ins RC:$ac);
+class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins RO:$ac);
string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))];
InstrItinClass Itinerary = itin;
}
-class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
- dag OutOperandList = (outs RC:$ac);
- dag InOperandList = (ins GPR32:$rs);
+class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
+ dag OutOperandList = (outs RO:$ac);
+ dag InOperandList = (ins GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
InstrItinClass Itinerary = itin;
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
- MipsPseudo<(outs GPR32:$dst), (ins), [(set GPR32:$dst, (OpNode))]> {
+ MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
bit usesCustomInserter = 1;
}
@@ -501,10 +500,10 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
- dag OutOperandList = (outs GPR32:$rt);
- dag InOperandList = (ins GPR32:$src, GPR32:$rs);
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$src, GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
- list<dag> Pattern = [(set GPR32:$rt, (OpNode GPR32:$src, GPR32:$rs))];
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
}
@@ -515,209 +514,209 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
// Addition/subtraction
class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
- NoItinerary, GPR32, GPR32>,
+ NoItinerary, GPR32Opnd, GPR32Opnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
- NoItinerary, GPR32, GPR32>,
+ NoItinerary, GPR32Opnd, GPR32Opnd>,
Defs<[DSPOutFlag20]>;
class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
- GPR32, GPR32>, IsCommutable,
+ GPR32Opnd, GPR32Opnd>, IsCommutable,
Defs<[DSPCarry]>;
class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
- GPR32, GPR32>,
+ GPR32Opnd, GPR32Opnd>,
IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>;
class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
- GPR32, GPR32>;
+ GPR32Opnd, GPR32Opnd>;
class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
// Absolute value
class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
- NoItinerary, GPR32>,
+ NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag20]>;
// Precision reduce/expand
class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
int_mips_precrq_qb_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
int_mips_precrq_ph_w,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
int_mips_precrq_rs_ph_w,
- NoItinerary, DSPRegs,
- GPR32>,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
int_mips_precrqu_s_qb_ph,
- NoItinerary, DSPRegs,
- DSPRegs>,
+ NoItinerary, DSPROpnd,
+ DSPROpnd>,
Defs<[DSPOutFlag22]>;
class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
int_mips_preceq_w_phl,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
int_mips_preceq_w_phr,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
int_mips_precequ_ph_qbl,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
int_mips_precequ_ph_qbr,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
int_mips_precequ_ph_qbla,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
int_mips_precequ_ph_qbra,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
int_mips_preceu_ph_qbl,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
int_mips_preceu_ph_qbr,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
int_mips_preceu_ph_qbla,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
int_mips_preceu_ph_qbra,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
// Shift
class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
- immZExt4, NoItinerary, DSPRegs>,
+ immZExt4, NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag22]>;
class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
- immZExt4, NoItinerary, DSPRegs>;
+ immZExt4, NoItinerary, DSPROpnd>;
class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
- immZExt5, NoItinerary, GPR32>,
+ immZExt5, NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
- NoItinerary, GPR32>,
+ NoItinerary, GPR32Opnd>,
Defs<[DSPOutFlag22]>;
class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
- immZExt5, NoItinerary, GPR32>;
+ immZExt5, NoItinerary, GPR32Opnd>;
class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
// Multiplication
class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
int_mips_muleu_s_ph_qbl,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag21]>;
class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
int_mips_muleu_s_ph_qbr,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
int_mips_muleq_s_w_phl,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
int_mips_muleq_s_w_phr,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
@@ -737,10 +736,10 @@ class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>,
Defs<[DSPOutFlag16_19]>;
// Move from/to hi/lo.
-class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
-class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
-class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
-class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HI32DSPOpnd, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LO32DSPOpnd, NoItinerary>;
// Dot product with accumulate/subtract
class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
@@ -773,67 +772,67 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
// Comparison
class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
int_mips_cmpu_eq_qb, NoItinerary,
- DSPRegs>,
+ DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
int_mips_cmpu_lt_qb, NoItinerary,
- DSPRegs>, Defs<[DSPCCond]>;
+ DSPROpnd>, Defs<[DSPCCond]>;
class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
int_mips_cmpu_le_qb, NoItinerary,
- DSPRegs>, Defs<[DSPCCond]>;
+ DSPROpnd>, Defs<[DSPCCond]>;
class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
int_mips_cmpgu_eq_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable;
class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
int_mips_cmpgu_lt_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
int_mips_cmpgu_le_qb,
- NoItinerary, GPR32, DSPRegs>;
+ NoItinerary, GPR32Opnd, DSPROpnd>;
class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPCCond]>;
class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPCCond]>;
// Misc
class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
- NoItinerary, DSPRegs, GPR32>;
+ NoItinerary, DSPROpnd, GPR32Opnd>;
class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Uses<[DSPCCond]>;
class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Uses<[DSPCCond]>;
class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>;
@@ -905,97 +904,97 @@ class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>,
// MIPS DSP Rev 2
// Addition/subtraction
class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
- DSPRegs, DSPRegs>, IsCommutable,
+ DSPROpnd, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag20]>;
class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag20]>;
class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
- DSPRegs, DSPRegs>,
+ DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
Defs<[DSPOutFlag20]>;
class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
- NoItinerary, DSPRegs>, IsCommutable;
+ NoItinerary, DSPROpnd>, IsCommutable;
class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
- NoItinerary, GPR32>, IsCommutable;
+ NoItinerary, GPR32Opnd>, IsCommutable;
class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
- NoItinerary, GPR32>, IsCommutable;
+ NoItinerary, GPR32Opnd>, IsCommutable;
class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
- NoItinerary, GPR32>;
+ NoItinerary, GPR32Opnd>;
// Comparison
class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
int_mips_cmpgdu_eq_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
IsCommutable, Defs<[DSPCCond]>;
class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
int_mips_cmpgdu_lt_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
Defs<[DSPCCond]>;
class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
int_mips_cmpgdu_le_qb,
- NoItinerary, GPR32, DSPRegs>,
+ NoItinerary, GPR32Opnd, DSPROpnd>,
Defs<[DSPCCond]>;
// Absolute
class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
- NoItinerary, DSPRegs>,
+ NoItinerary, DSPROpnd>,
Defs<[DSPOutFlag20]>;
// Multiplication
class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
- DSPRegs>, IsCommutable,
+ DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
- NoItinerary, DSPRegs>, IsCommutable,
+ NoItinerary, DSPROpnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
- NoItinerary, GPR32>, IsCommutable,
+ NoItinerary, GPR32Opnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
- NoItinerary, GPR32>, IsCommutable,
+ NoItinerary, GPR32Opnd>, IsCommutable,
Defs<[DSPOutFlag21]>;
class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
- NoItinerary, DSPRegs, DSPRegs>,
+ NoItinerary, DSPROpnd, DSPROpnd>,
IsCommutable, Defs<[DSPOutFlag21]>;
// Dot product with accumulate/subtract
@@ -1026,36 +1025,36 @@ class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
// Precision reduce/expand
class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
int_mips_precr_qb_ph,
- NoItinerary, DSPRegs, DSPRegs>;
+ NoItinerary, DSPROpnd, DSPROpnd>;
class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
int_mips_precr_sra_ph_w,
- NoItinerary, DSPRegs,
- GPR32>;
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
int_mips_precr_sra_r_ph_w,
- NoItinerary, DSPRegs,
- GPR32>;
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
// Shift
class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
- immZExt3, NoItinerary, DSPRegs>;
+ immZExt3, NoItinerary, DSPROpnd>;
class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
- NoItinerary, DSPRegs>;
+ NoItinerary, DSPROpnd>;
// Misc
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
@@ -1240,24 +1239,24 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
}
// Pseudos.
-let isPseudo = 1 in {
+let isPseudo = 1, isCodeGenOnly = 1 in {
// Pseudo instructions for loading and storing accumulator registers.
- defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSPOpnd>;
- defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSPOpnd>;
+ def LOAD_ACC64DSP : Load<"", ACC64DSPOpnd>;
+ def STORE_ACC64DSP : Store<"", ACC64DSPOpnd>;
// Pseudos for loading and storing ccond field of DSP control register.
- defm LOAD_CCOND_DSP : LoadM<"load_ccond_dsp", DSPCC>;
- defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>;
+ def LOAD_CCOND_DSP : Load<"load_ccond_dsp", DSPCC>;
+ def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>;
}
// Pseudo CMP and PICK instructions.
class PseudoCMP<Instruction RealInst> :
- PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
- PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+ PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPROpnd:$rs, DSPROpnd:$rt)>, NeverHasSideEffects;
class PseudoPICK<Instruction RealInst> :
- PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
- PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+ PseudoDSP<(outs DSPROpnd:$rd), (ins DSPCC:$cmp, DSPROpnd:$rs, DSPROpnd:$rt), []>,
+ PseudoInstExpansion<(RealInst DSPROpnd:$rd, DSPROpnd:$rs, DSPROpnd:$rt)>,
NeverHasSideEffects;
def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
@@ -1270,6 +1269,8 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
def PseudoPICK_PH : PseudoPICK<PICK_PH>;
def PseudoPICK_QB : PseudoPICK<PICK_QB>;
+def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
Pat<pattern, result>, Requires<[pred]>;
@@ -1279,19 +1280,19 @@ class BitconvertPat<ValueType DstVT, ValueType SrcVT, RegisterClass DstRC,
DSPPat<(DstVT (bitconvert (SrcVT SrcRC:$src))),
(COPY_TO_REGCLASS SrcRC:$src, DstRC)>;
-def : BitconvertPat<i32, v2i16, GPR32, DSPRegs>;
-def : BitconvertPat<i32, v4i8, GPR32, DSPRegs>;
-def : BitconvertPat<v2i16, i32, DSPRegs, GPR32>;
-def : BitconvertPat<v4i8, i32, DSPRegs, GPR32>;
+def : BitconvertPat<i32, v2i16, GPR32, DSPR>;
+def : BitconvertPat<i32, v4i8, GPR32, DSPR>;
+def : BitconvertPat<v2i16, i32, DSPR, GPR32>;
+def : BitconvertPat<v4i8, i32, DSPR, GPR32>;
def : DSPPat<(v2i16 (load addr:$a)),
- (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
+ (v2i16 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>;
def : DSPPat<(v4i8 (load addr:$a)),
- (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPRegs))>;
-def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
- (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>;
-def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
- (SW (COPY_TO_REGCLASS DSPRegs:$val, GPR32), addr:$a)>;
+ (v4i8 (COPY_TO_REGCLASS (LW addr:$a), DSPR))>;
+def : DSPPat<(store (v2i16 DSPR:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>;
+def : DSPPat<(store (v4i8 DSPR:$val), addr:$a),
+ (SW (COPY_TO_REGCLASS DSPR:$val, GPR32), addr:$a)>;
// Binary operations.
class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
@@ -1336,7 +1337,7 @@ class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
CondCode CC> :
DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
(ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
- (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR)),
(ValTy ZERO)))>;
class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
@@ -1344,7 +1345,7 @@ class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
(ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
(ValTy ZERO),
- (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+ (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPR))))>;
class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
CondCode CC> :
@@ -1384,12 +1385,12 @@ def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
// Extr patterns.
class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode GPR32:$rs, ACRegsDSP:$ac)),
- (Instr ACRegsDSP:$ac, GPR32:$rs)>;
+ DSPPat<(i32 (OpNode GPR32:$rs, ACC64DSP:$ac)),
+ (Instr ACC64DSP:$ac, GPR32:$rs)>;
class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> :
- DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)),
- (Instr ACRegsDSP:$ac, immZExt5:$shift)>;
+ DSPPat<(i32 (OpNode immZExt5:$shift, ACC64DSP:$ac)),
+ (Instr ACC64DSP:$ac, immZExt5:$shift)>;
def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>;
def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>;
@@ -1404,11 +1405,6 @@ def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>;
def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>;
def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>;
-// mflo/hi patterns.
-let AddedComplexity = 20 in
-def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>;
-
// Indexed load patterns.
class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> :
DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))),
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 545a38d..ffbd83b 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -421,8 +421,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return false;
if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
- return !PSV->PseudoSourceValue::isConstant(0) &&
- (V != PseudoSourceValue::getStack());
+ return !PSV->isConstant(0) && V != PseudoSourceValue::getStack();
return true;
}
@@ -563,14 +562,13 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.init(*Slot);
- if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) {
- MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
- MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
- ++UsefulSlots;
- return true;
- }
+ if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler))
+ return false;
- return false;
+ MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base());
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
@@ -584,14 +582,13 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.setCallerSaved(*Slot);
- if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) {
- MBB.splice(llvm::next(Slot), &MBB, Filler);
- MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
- ++UsefulSlots;
- return true;
- }
+ if (!searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler))
+ return false;
- return false;
+ MBB.splice(llvm::next(Slot), &MBB, Filler);
+ MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot)));
+ ++UsefulSlots;
+ return true;
}
bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 0002a5f..c417bd5 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -69,6 +69,12 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
llvm_unreachable("Unimplemented function.");
@@ -81,12 +87,83 @@ bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Alias) {
llvm_unreachable("Unimplemented function.");
return false;
}
+bool MipsDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm3(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm4(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm6(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimm8(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
+bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
+ llvm_unreachable("Unimplemented function.");
+ return false;
+}
+
/// Select instructions not customized! Used for
/// expanded, promoted and normal instructions
SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -98,6 +175,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
return NULL;
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index cf0f9c5..a4d9da5 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -57,6 +57,11 @@ private:
virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ // Complex Pattern.
+ /// (reg + reg).
+ virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
/// Fall back on this function if all else fails.
virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
@@ -65,9 +70,42 @@ private:
virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Alias);
+ /// \brief Select constant vector splats.
+ virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm1.
+ virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm2.
+ virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm3.
+ virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm4.
+ virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm5.
+ virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm6.
+ virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm8.
+ virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a simm5.
+ virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a power of 2.
+ virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is the inverse of a
+ /// power of 2.
+ virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// ending at the most significant bit
+ virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// starting at bit zero.
+ virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+
virtual SDNode *Select(SDNode *N);
virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index a62e84f..1e8250c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -20,6 +20,7 @@
#include "MipsTargetMachine.h"
#include "MipsTargetObjectFile.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -34,6 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
@@ -67,7 +69,7 @@ static const uint16_t Mips64DPRegs[8] = {
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
if (!isShiftedMask_64(I))
- return false;
+ return false;
Size = CountPopulation_64(I);
Pos = countTrailingZeros(I);
@@ -79,72 +81,35 @@ SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
}
-static SDValue getTargetNode(SDValue Op, SelectionDAG &DAG, unsigned Flag) {
- EVT Ty = Op.getValueType();
+SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
+}
- if (GlobalAddressSDNode *N = dyn_cast<GlobalAddressSDNode>(Op))
- return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(Op), Ty, 0,
- Flag);
- if (ExternalSymbolSDNode *N = dyn_cast<ExternalSymbolSDNode>(Op))
- return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
- if (BlockAddressSDNode *N = dyn_cast<BlockAddressSDNode>(Op))
- return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
- if (JumpTableSDNode *N = dyn_cast<JumpTableSDNode>(Op))
- return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
- if (ConstantPoolSDNode *N = dyn_cast<ConstantPoolSDNode>(Op))
- return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
- N->getOffset(), Flag);
-
- llvm_unreachable("Unexpected node type.");
- return SDValue();
+SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag);
}
-static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Hi = getTargetNode(Op, DAG, MipsII::MO_ABS_HI);
- SDValue Lo = getTargetNode(Op, DAG, MipsII::MO_ABS_LO);
- return DAG.getNode(ISD::ADD, DL, Ty,
- DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
- DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
}
-SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG,
- bool HasMips64) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
- SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
- getTargetNode(Op, DAG, GOTFlag));
- SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
- unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
- SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty, getTargetNode(Op, DAG, LoFlag));
- return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
-}
-
-SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG,
+SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
unsigned Flag) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
- getTargetNode(Op, DAG, Flag));
- return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
}
-SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag,
- unsigned LoFlag) const {
- SDLoc DL(Op);
- EVT Ty = Op.getValueType();
- SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag));
- Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
- SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
- getTargetNode(Op, DAG, LoFlag));
- return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
+ SelectionDAG &DAG,
+ unsigned Flag) const {
+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
+ N->getOffset(), Flag);
}
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -162,8 +127,9 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T";
case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F";
case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP";
- case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI";
- case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI";
+ case MipsISD::MFHI: return "MipsISD::MFHI";
+ case MipsISD::MFLO: return "MipsISD::MFLO";
+ case MipsISD::MTLOHI: return "MipsISD::MTLOHI";
case MipsISD::Mult: return "MipsISD::Mult";
case MipsISD::Multu: return "MipsISD::Multu";
case MipsISD::MAdd: return "MipsISD::MAdd";
@@ -207,6 +173,30 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP";
case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP";
case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP";
+ case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO";
+ case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO";
+ case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO";
+ case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO";
+ case MipsISD::VCEQ: return "MipsISD::VCEQ";
+ case MipsISD::VCLE_S: return "MipsISD::VCLE_S";
+ case MipsISD::VCLE_U: return "MipsISD::VCLE_U";
+ case MipsISD::VCLT_S: return "MipsISD::VCLT_S";
+ case MipsISD::VCLT_U: return "MipsISD::VCLT_U";
+ case MipsISD::VSMAX: return "MipsISD::VSMAX";
+ case MipsISD::VSMIN: return "MipsISD::VSMIN";
+ case MipsISD::VUMAX: return "MipsISD::VUMAX";
+ case MipsISD::VUMIN: return "MipsISD::VUMIN";
+ case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT";
+ case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT";
+ case MipsISD::VNOR: return "MipsISD::VNOR";
+ case MipsISD::VSHF: return "MipsISD::VSHF";
+ case MipsISD::SHF: return "MipsISD::SHF";
+ case MipsISD::ILVEV: return "MipsISD::ILVEV";
+ case MipsISD::ILVOD: return "MipsISD::ILVOD";
+ case MipsISD::ILVL: return "MipsISD::ILVL";
+ case MipsISD::ILVR: return "MipsISD::ILVR";
+ case MipsISD::PCKEV: return "MipsISD::PCKEV";
+ case MipsISD::PCKOD: return "MipsISD::PCKOD";
default: return NULL;
}
}
@@ -424,8 +414,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT Ty = N->getValueType(0);
- unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
- unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
+ unsigned LO = (Ty == MVT::i32) ? Mips::LO0 : Mips::LO0_64;
+ unsigned HI = (Ty == MVT::i32) ? Mips::HI0 : Mips::HI0_64;
unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 :
MipsISD::DivRemU16;
SDLoc DL(N);
@@ -566,7 +556,7 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
// Pattern match EXT.
// $dst = and ((sra or srl) $src , pos), (2**size - 1)
// => ext $dst, $src, size, pos
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
return SDValue();
SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
@@ -607,7 +597,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
// $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
// where mask1 = (2**size - 1) << pos, mask0 = ~mask1
// => ins $dst, $src, size, pos, $src1
- if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
+ if (DCI.isBeforeLegalizeOps() || !Subtarget->hasExtractInsert())
return SDValue();
SDValue And0 = N->getOperand(0), And1 = N->getOperand(1);
@@ -779,13 +769,17 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
// Insert instruction "teq $divisor_reg, $zero, 7".
MachineBasicBlock::iterator I(MI);
MachineInstrBuilder MIB;
+ MachineOperand &Divisor = MI->getOperand(2);
MIB = BuildMI(MBB, llvm::next(I), MI->getDebugLoc(), TII.get(Mips::TEQ))
- .addOperand(MI->getOperand(2)).addReg(Mips::ZERO).addImm(7);
+ .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill()))
+ .addReg(Mips::ZERO).addImm(7);
// Use the 32-bit sub-register if this is a 64-bit division.
if (Is64Bit)
MIB->getOperand(0).setSubReg(Mips::sub_32);
+ // Clear Divisor's kill flag.
+ Divisor.setIsKill(false);
return &MBB;
}
@@ -796,107 +790,75 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
default:
llvm_unreachable("Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
- case Mips::ATOMIC_LOAD_ADD_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
- case Mips::ATOMIC_LOAD_ADD_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
- case Mips::ATOMIC_LOAD_ADD_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I64:
- case Mips::ATOMIC_LOAD_ADD_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::DADDu);
case Mips::ATOMIC_LOAD_AND_I8:
- case Mips::ATOMIC_LOAD_AND_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
- case Mips::ATOMIC_LOAD_AND_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
- case Mips::ATOMIC_LOAD_AND_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I64:
- case Mips::ATOMIC_LOAD_AND_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::AND64);
case Mips::ATOMIC_LOAD_OR_I8:
- case Mips::ATOMIC_LOAD_OR_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
- case Mips::ATOMIC_LOAD_OR_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
- case Mips::ATOMIC_LOAD_OR_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I64:
- case Mips::ATOMIC_LOAD_OR_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::OR64);
case Mips::ATOMIC_LOAD_XOR_I8:
- case Mips::ATOMIC_LOAD_XOR_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
- case Mips::ATOMIC_LOAD_XOR_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
- case Mips::ATOMIC_LOAD_XOR_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I64:
- case Mips::ATOMIC_LOAD_XOR_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::XOR64);
case Mips::ATOMIC_LOAD_NAND_I8:
- case Mips::ATOMIC_LOAD_NAND_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
- case Mips::ATOMIC_LOAD_NAND_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
- case Mips::ATOMIC_LOAD_NAND_I32_P8:
return emitAtomicBinary(MI, BB, 4, 0, true);
case Mips::ATOMIC_LOAD_NAND_I64:
- case Mips::ATOMIC_LOAD_NAND_I64_P8:
return emitAtomicBinary(MI, BB, 8, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
- case Mips::ATOMIC_LOAD_SUB_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
- case Mips::ATOMIC_LOAD_SUB_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
- case Mips::ATOMIC_LOAD_SUB_I32_P8:
return emitAtomicBinary(MI, BB, 4, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I64:
- case Mips::ATOMIC_LOAD_SUB_I64_P8:
return emitAtomicBinary(MI, BB, 8, Mips::DSUBu);
case Mips::ATOMIC_SWAP_I8:
- case Mips::ATOMIC_SWAP_I8_P8:
return emitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
- case Mips::ATOMIC_SWAP_I16_P8:
return emitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
- case Mips::ATOMIC_SWAP_I32_P8:
return emitAtomicBinary(MI, BB, 4, 0);
case Mips::ATOMIC_SWAP_I64:
- case Mips::ATOMIC_SWAP_I64_P8:
return emitAtomicBinary(MI, BB, 8, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
- case Mips::ATOMIC_CMP_SWAP_I8_P8:
return emitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
- case Mips::ATOMIC_CMP_SWAP_I16_P8:
return emitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
- case Mips::ATOMIC_CMP_SWAP_I32_P8:
return emitAtomicCmpSwap(MI, BB, 4);
case Mips::ATOMIC_CMP_SWAP_I64:
- case Mips::ATOMIC_CMP_SWAP_I64_P8:
return emitAtomicCmpSwap(MI, BB, 8);
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
@@ -923,16 +885,16 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
- LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ LL = Mips::LL;
+ SC = Mips::SC;
AND = Mips::AND;
NOR = Mips::NOR;
ZERO = Mips::ZERO;
BEQ = Mips::BEQ;
}
else {
- LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
- SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ LL = Mips::LLD;
+ SC = Mips::SCD;
AND = Mips::AND64;
NOR = Mips::NOR64;
ZERO = Mips::ZERO_64;
@@ -958,8 +920,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
+ llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
@@ -990,7 +951,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
@@ -1001,15 +962,13 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
unsigned Size, unsigned BinOpcode,
bool Nand) const {
assert((Size == 1 || Size == 2) &&
- "Unsupported size for EmitAtomicBinaryPartial.");
+ "Unsupported size for EmitAtomicBinaryPartial.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1106,7 +1065,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// beq success,$0,loopMBB
BB = loopMBB;
- BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
if (Nand) {
// and andres, oldval, incr2
// nor binopres, $0, andres
@@ -1120,7 +1079,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// and newval, binopres, mask
BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2);
BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask);
- } else {// atomic.swap
+ } else { // atomic.swap
// and newval, incr2, mask
BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
}
@@ -1129,7 +1088,7 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal0).addReg(NewVal);
- BuildMI(BB, DL, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(Mips::SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
@@ -1151,15 +1110,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
.addReg(SllRes).addImm(ShiftImm);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
-MachineBasicBlock *
-MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
+MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
MachineFunction *MF = BB->getParent();
@@ -1170,15 +1128,14 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
unsigned LL, SC, ZERO, BNE, BEQ;
if (Size == 4) {
- LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+ LL = Mips::LL;
+ SC = Mips::SC;
ZERO = Mips::ZERO;
BNE = Mips::BNE;
BEQ = Mips::BEQ;
- }
- else {
- LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
- SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+ } else {
+ LL = Mips::LLD;
+ SC = Mips::SCD;
ZERO = Mips::ZERO_64;
BNE = Mips::BNE64;
BEQ = Mips::BEQ64;
@@ -1233,7 +1190,7 @@ MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
BuildMI(BB, DL, TII->get(BEQ))
.addReg(Success).addReg(ZERO).addMBB(loop1MBB);
- MI->eraseFromParent(); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
@@ -1250,8 +1207,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
- unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
@@ -1348,7 +1303,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
// and maskedoldval0,oldval,mask
// bne maskedoldval0,shiftedcmpval,sinkMBB
BB = loop1MBB;
- BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
+ BuildMI(BB, DL, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, DL, TII->get(Mips::BNE))
@@ -1364,7 +1319,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
.addReg(OldVal).addReg(Mask2);
BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
.addReg(MaskedOldVal1).addReg(ShiftedNewVal);
- BuildMI(BB, DL, TII->get(SC), Success)
+ BuildMI(BB, DL, TII->get(Mips::SC), Success)
.addReg(StoreVal).addReg(AlignedAddr).addImm(0);
BuildMI(BB, DL, TII->get(Mips::BEQ))
.addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
@@ -1421,9 +1376,7 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
}
-SDValue MipsTargetLowering::
-lowerBRCOND(SDValue Op, SelectionDAG &DAG) const
-{
+SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// The first operand is the chain, the second is the condition, the third is
// the block to branch to if the condition is true.
SDValue Chain = Op.getOperand(0);
@@ -1489,7 +1442,9 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = N->getGlobal();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
const MipsTargetObjectFile &TLOF =
@@ -1506,26 +1461,31 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
}
// %hi/%lo relocation
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
}
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
if (LargeGOT)
- return getAddrGlobalLargeGOT(Op, DAG, MipsII::MO_GOT_HI16,
- MipsII::MO_GOT_LO16);
+ return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
+ MipsII::MO_GOT_LO16, DAG.getEntryNode(),
+ MachinePointerInfo::getGOT());
- return getAddrGlobal(Op, DAG,
- HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
+ return getAddrGlobal(N, Ty, DAG,
+ HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
+ DAG.getEntryNode(), MachinePointerInfo::getGOT());
}
SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
+ BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
+ EVT Ty = Op.getValueType();
+
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::
@@ -1612,10 +1572,13 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
SDValue MipsTargetLowering::
lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
{
+ JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
+ EVT Ty = Op.getValueType();
+
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::
@@ -1630,11 +1593,13 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
// SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
// ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ EVT Ty = Op.getValueType();
if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64)
- return getAddrNonPIC(Op, DAG);
+ return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(Op, DAG, HasMips64);
+ return getAddrLocal(N, Ty, DAG, HasMips64);
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1652,7 +1617,8 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV), false, false, 0);
}
-static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
EVT TyX = Op.getOperand(0).getValueType();
EVT TyY = Op.getOperand(1).getValueType();
SDValue Const1 = DAG.getConstant(1, MVT::i32);
@@ -1671,7 +1637,7 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
Const1);
- if (HasR2) {
+ if (HasExtractInsert) {
// ext E, Y, 31, 1 ; extract bit31 of Y
// ins X, E, 31, 1 ; insert extracted bit at bit31 of X
SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
@@ -1697,7 +1663,8 @@ static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
@@ -1708,7 +1675,7 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
- if (HasR2) {
+ if (HasExtractInsert) {
// ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y
// ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X
SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
@@ -1748,12 +1715,13 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue
MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64())
- return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasExtractInsert());
- return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasExtractInsert());
}
-static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
SDLoc DL(Op);
@@ -1765,7 +1733,7 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
Const1);
// Clear MSB.
- if (HasR2)
+ if (HasExtractInsert)
Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
DAG.getRegister(Mips::ZERO, MVT::i32),
DAG.getConstant(31, MVT::i32), Const1, X);
@@ -1782,7 +1750,8 @@ static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
}
-static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
+ bool HasExtractInsert) {
SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
SDLoc DL(Op);
@@ -1790,7 +1759,7 @@ static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
// Clear MSB.
- if (HasR2)
+ if (HasExtractInsert)
Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
DAG.getRegister(Mips::ZERO_64, MVT::i64),
DAG.getConstant(63, MVT::i32), Const1, X);
@@ -1805,9 +1774,9 @@ static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
SDValue
MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
- return lowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS64(Op, DAG, Subtarget->hasExtractInsert());
- return lowerFABS32(Op, DAG, Subtarget->hasMips32r2());
+ return lowerFABS32(Op, DAG, Subtarget->hasExtractInsert());
}
SDValue MipsTargetLowering::
@@ -2152,21 +2121,14 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
//===----------------------------------------------------------------------===//
-static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State, const uint16_t *F64Regs) {
- static const unsigned IntRegsSize=4, FloatRegsSize=2;
+ static const unsigned IntRegsSize = 4, FloatRegsSize = 2;
- static const uint16_t IntRegs[] = {
- Mips::A0, Mips::A1, Mips::A2, Mips::A3
- };
- static const uint16_t F32Regs[] = {
- Mips::F12, Mips::F14
- };
- static const uint16_t F64Regs[] = {
- Mips::D6, Mips::D7
- };
+ static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+ static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 };
// Do not process byval args here.
if (ArgFlags.isByVal())
@@ -2235,14 +2197,28 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
return false;
}
+static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 };
+
+ return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
+}
+
+static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ static const uint16_t F64Regs[] = { Mips::D12_64, Mips::D14_64 };
+
+ return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
+}
+
#include "MipsGenCallingConv.inc"
//===----------------------------------------------------------------------===//
// Call Calling Convention Implementation
//===----------------------------------------------------------------------===//
-static const unsigned O32IntRegsSize = 4;
-
// Return next O32 integer argument register.
static unsigned getNextIntArgReg(unsigned Reg) {
assert((Reg == Mips::A0) || (Reg == Mips::A2));
@@ -2339,6 +2315,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
+ MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
// Analyze operands of the call, assigning locations to each operand.
@@ -2347,10 +2324,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC::SpecialCallingConvType SpecialCallingConv =
getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo, SpecialCallingConv);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo,
+ SpecialCallingConv);
MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
- getTargetMachine().Options.UseSoftFloat,
+ Subtarget->mipsSEUsesSoftFloat(),
Callee.getNode(), CLI.Args);
// Get a count of how many bytes are to be pushed on the stack.
@@ -2467,32 +2445,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
+ EVT Ty = Callee.getValueType();
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {
- InternalLinkage = G->getGlobal()->hasInternalLinkage();
+ const GlobalValue *Val = G->getGlobal();
+ InternalLinkage = Val->hasInternalLinkage();
if (InternalLinkage)
- Callee = getAddrLocal(Callee, DAG, HasMips64);
+ Callee = getAddrLocal(G, Ty, DAG, HasMips64);
else if (LargeGOT)
- Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
- MipsII::MO_CALL_LO16);
+ Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16, Chain,
+ FuncInfo->callPtrInfo(Val));
else
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+ FuncInfo->callPtrInfo(Val));
} else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+
if (!IsN64 && !IsPIC) // !N64 && static
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
else if (LargeGOT)
- Callee = getAddrGlobalLargeGOT(Callee, DAG, MipsII::MO_CALL_HI16,
- MipsII::MO_CALL_LO16);
+ Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
+ MipsII::MO_CALL_LO16, Chain,
+ FuncInfo->callPtrInfo(Sym));
else // N64 || PIC
- Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL);
+ Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
+ FuncInfo->callPtrInfo(Sym));
GlobalOrExternal = true;
}
@@ -2534,9 +2520,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
- MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat,
+ MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(),
CallNode, RetTy);
// Copy all of the result registers out of their specified physreg.
@@ -2581,10 +2567,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
- bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat;
+ bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat();
MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -2613,21 +2599,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Arguments stored on registers
if (IsRegLoc) {
- EVT RegVT = VA.getLocVT();
+ MVT RegVT = VA.getLocVT();
unsigned ArgReg = VA.getLocReg();
- const TargetRegisterClass *RC;
-
- if (RegVT == MVT::i32)
- RC = Subtarget->inMips16Mode()? &Mips::CPU16RegsRegClass :
- &Mips::GPR32RegClass;
- else if (RegVT == MVT::i64)
- RC = &Mips::GPR64RegClass;
- else if (RegVT == MVT::f32)
- RC = &Mips::FGR32RegClass;
- else if (RegVT == MVT::f64)
- RC = HasMips64 ? &Mips::FGR64RegClass : &Mips::AFGR64RegClass;
- else
- llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
// Transform the arguments stored on
// physical registers into virtual ones
@@ -2677,9 +2651,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ SDValue Load = DAG.getLoad(ValVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+ InVals.push_back(Load);
+ OutChains.push_back(Load.getValue(1));
}
}
@@ -2740,10 +2716,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
*DAG.getContext());
- MipsCC MipsCCInfo(CallConv, IsO32, CCInfo);
+ MipsCC MipsCCInfo(CallConv, IsO32, Subtarget->isFP64bit(), CCInfo);
// Analyze return values.
- MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat,
+ MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(),
MF.getFunction()->getReturnType());
SDValue Flag;
@@ -2802,7 +2778,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
MipsTargetLowering::ConstraintType MipsTargetLowering::
getConstraintType(const std::string &Constraint) const
{
- // Mips specific constrainy
+ // Mips specific constraints
// GCC config/mips/constraints.md
//
// 'd' : An address register. Equivalent to r
@@ -2853,16 +2829,19 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
if (type->isIntegerTy())
weight = CW_Register;
break;
- case 'f':
- if (type->isFloatTy())
+ case 'f': // FPU or MSA register
+ if (Subtarget->hasMSA() && type->isVectorTy() &&
+ cast<VectorType>(type)->getBitWidth() == 128)
+ weight = CW_Register;
+ else if (type->isFloatTy())
weight = CW_Register;
break;
case 'c': // $25 for indirect jumps
case 'l': // lo register
case 'x': // hilo register pair
- if (type->isIntegerTy())
+ if (type->isIntegerTy())
weight = CW_SpecificReg;
- break;
+ break;
case 'I': // signed 16 bit immediate
case 'J': // integer zero
case 'K': // unsigned 16 bit immediate
@@ -2880,6 +2859,104 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
return weight;
}
+/// This is a helper function to parse a physical register string and split it
+/// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
+/// that is returned indicates whether parsing was successful. The second flag
+/// is true if the numeric part exists.
+static std::pair<bool, bool>
+parsePhysicalReg(const StringRef &C, std::string &Prefix,
+ unsigned long long &Reg) {
+ if (C.front() != '{' || C.back() != '}')
+ return std::make_pair(false, false);
+
+ // Search for the first numeric character.
+ StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
+ I = std::find_if(B, E, std::ptr_fun(isdigit));
+
+ Prefix.assign(B, I - B);
+
+ // The second flag is set to false if no numeric characters were found.
+ if (I == E)
+ return std::make_pair(true, false);
+
+ // Parse the numeric characters.
+ return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg),
+ true);
+}
+
+std::pair<unsigned, const TargetRegisterClass *> MipsTargetLowering::
+parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterClass *RC;
+ std::string Prefix;
+ unsigned long long Reg;
+
+ std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
+
+ if (!R.first)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo.
+ // No numeric characters follow "hi" or "lo".
+ if (R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ RC = TRI->getRegClass(Prefix == "hi" ?
+ Mips::HI32RegClassID : Mips::LO32RegClassID);
+ return std::make_pair(*(RC->begin()), RC);
+ } else if (Prefix.compare(0, 4, "$msa") == 0) {
+ // Parse $msa(ir|csr|access|save|modify|request|map|unmap)
+
+ // No numeric characters follow the name.
+ if (R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+
+ Reg = StringSwitch<unsigned long long>(Prefix)
+ .Case("$msair", Mips::MSAIR)
+ .Case("$msacsr", Mips::MSACSR)
+ .Case("$msaaccess", Mips::MSAAccess)
+ .Case("$msasave", Mips::MSASave)
+ .Case("$msamodify", Mips::MSAModify)
+ .Case("$msarequest", Mips::MSARequest)
+ .Case("$msamap", Mips::MSAMap)
+ .Case("$msaunmap", Mips::MSAUnmap)
+ .Default(0);
+
+ if (!Reg)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+
+ RC = TRI->getRegClass(Mips::MSACtrlRegClassID);
+ return std::make_pair(Reg, RC);
+ }
+
+ if (!R.second)
+ return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+
+ if (Prefix == "$f") { // Parse $f0-$f31.
+ // If the size of FP registers is 64-bit or Reg is an even number, select
+ // the 64-bit register class. Otherwise, select the 32-bit register class.
+ if (VT == MVT::Other)
+ VT = (Subtarget->isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32;
+
+ RC = getRegClassFor(VT);
+
+ if (RC == &Mips::AFGR64RegClass) {
+ assert(Reg % 2 == 0);
+ Reg >>= 1;
+ }
+ } else if (Prefix == "$fcc") // Parse $fcc0-$fcc7.
+ RC = TRI->getRegClass(Mips::FCCRegClassID);
+ else if (Prefix == "$w") { // Parse $w0-$w31.
+ RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT);
+ } else { // Parse $0-$31.
+ assert(Prefix == "$");
+ RC = getRegClassFor((VT == MVT::Other) ? MVT::i32 : VT);
+ }
+
+ assert(Reg < RC->getNumRegs());
+ return std::make_pair(*(RC->begin() + Reg), RC);
+}
+
/// Given a register class constraint, like 'r', if this corresponds directly
/// to an LLVM register class, return a register of 0 and the register class
/// pointer.
@@ -2902,10 +2979,18 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
- case 'f':
- if (VT == MVT::f32)
+ case 'f': // FPU or MSA register
+ if (VT == MVT::v16i8)
+ return std::make_pair(0U, &Mips::MSA128BRegClass);
+ else if (VT == MVT::v8i16 || VT == MVT::v8f16)
+ return std::make_pair(0U, &Mips::MSA128HRegClass);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return std::make_pair(0U, &Mips::MSA128WRegClass);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return std::make_pair(0U, &Mips::MSA128DRegClass);
+ else if (VT == MVT::f32)
return std::make_pair(0U, &Mips::FGR32RegClass);
- if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
+ else if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
if (Subtarget->isFP64bit())
return std::make_pair(0U, &Mips::FGR64RegClass);
return std::make_pair(0U, &Mips::AFGR64RegClass);
@@ -2918,14 +3003,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
case 'l': // register suitable for indirect jump
if (VT == MVT::i32)
- return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass);
- return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass);
+ return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
+ return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
case 'x': // register suitable for indirect jump
// Fixme: Not triggering the use of both hi and low
// This will generate an error message
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
}
+
+ std::pair<unsigned, const TargetRegisterClass *> R;
+ R = parseRegForInlineAsmConstraint(Constraint, VT);
+
+ if (R.second)
+ return R;
+
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -3024,8 +3116,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-bool
-MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const {
+bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
@@ -3089,13 +3181,13 @@ static bool isF128SoftLibCall(const char *CallSym) {
"log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl",
"truncl"};
- const char * const *End = LibCalls + array_lengthof(LibCalls);
+ const char *const *End = LibCalls + array_lengthof(LibCalls);
// Check that LibCalls is sorted alphabetically.
MipsTargetLowering::LTStr Comp;
#ifndef NDEBUG
- for (const char * const *I = LibCalls; I < End - 1; ++I)
+ for (const char *const *I = LibCalls; I < End - 1; ++I)
assert(Comp(*I, *(I + 1)));
#endif
@@ -3133,9 +3225,9 @@ MipsTargetLowering::MipsCC::SpecialCallingConvType
}
MipsTargetLowering::MipsCC::MipsCC(
- CallingConv::ID CC, bool IsO32_, CCState &Info,
- MipsCC::SpecialCallingConvType SpecialCallingConv_)
- : CCInfo(Info), CallConv(CC), IsO32(IsO32_),
+ CallingConv::ID CC, bool IsO32_, bool IsFP64_, CCState &Info,
+ MipsCC::SpecialCallingConvType SpecialCallingConv_)
+ : CCInfo(Info), CallConv(CC), IsO32(IsO32_), IsFP64(IsFP64_),
SpecialCallingConv(SpecialCallingConv_){
// Pre-allocate reserved argument area.
CCInfo.AllocateStack(reservedArgArea(), 1);
@@ -3249,11 +3341,10 @@ analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
}
-void
-MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
- MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags) {
+void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
+ MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags) {
assert(ArgFlags.getByValSize() && "Byval argument's size shouldn't be 0.");
struct ByValArgInfo ByVal;
@@ -3291,11 +3382,11 @@ llvm::CCAssignFn *MipsTargetLowering::MipsCC::fixedArgFn() const {
if (SpecialCallingConv == Mips16RetHelperConv)
return CC_Mips16RetHelper;
- return IsO32 ? CC_MipsO32 : CC_MipsN;
+ return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN;
}
llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
- return IsO32 ? CC_MipsO32 : CC_MipsN_VarArg;
+ return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg;
}
const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
@@ -3473,17 +3564,15 @@ passByValArg(SDValue Chain, SDLoc DL,
DAG.getConstant(Offset, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
DAG.getIntPtrConstant(ByVal.Address));
- Chain = DAG.getMemcpy(Chain, DL, Dst, Src,
- DAG.getConstant(MemCpySize, PtrTy), Alignment,
- /*isVolatile=*/false, /*AlwaysInline=*/false,
+ Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
+ Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
MachinePointerInfo(0), MachinePointerInfo(0));
MemOpChains.push_back(Chain);
}
-void
-MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
- const MipsCC &CC, SDValue Chain,
- SDLoc DL, SelectionDAG &DAG) const {
+void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
+ const MipsCC &CC, SDValue Chain,
+ SDLoc DL, SelectionDAG &DAG) const {
unsigned NumRegs = CC.numIntArgRegs();
const uint16_t *ArgRegs = CC.intArgRegs();
const CCState &CCInfo = CC.getCCInfo();
@@ -3501,8 +3590,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
if (NumRegs == Idx)
VaArgOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), RegSize);
else
- VaArgOffset =
- (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
+ VaArgOffset = (int)CC.reservedArgArea() - (int)(RegSize * (NumRegs - Idx));
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 123a2a6..65f68f0 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -17,6 +17,7 @@
#include "Mips.h"
#include "MipsSubtarget.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
@@ -69,10 +70,11 @@ namespace llvm {
EH_RETURN,
// Node used to extract integer from accumulator.
- ExtractLOHI,
+ MFHI,
+ MFLO,
// Node used to insert integers to accumulator.
- InsertLOHI,
+ MTLOHI,
// Mult nodes.
Mult,
@@ -152,6 +154,43 @@ namespace llvm {
SETCC_DSP,
SELECT_CC_DSP,
+ // Vector comparisons.
+ // These take a vector and return a boolean.
+ VALL_ZERO,
+ VANY_ZERO,
+ VALL_NONZERO,
+ VANY_NONZERO,
+
+ // These take a vector and return a vector bitmask.
+ VCEQ,
+ VCLE_S,
+ VCLE_U,
+ VCLT_S,
+ VCLT_U,
+
+ // Element-wise vector max/min.
+ VSMAX,
+ VSMIN,
+ VUMAX,
+ VUMIN,
+
+ // Vector Shuffle with mask as an operand
+ VSHF, // Generic shuffle
+ SHF, // 4-element set shuffle.
+ ILVEV, // Interleave even elements
+ ILVOD, // Interleave odd elements
+ ILVL, // Interleave left elements
+ ILVR, // Interleave right elements
+ PCKEV, // Pack even elements
+ PCKOD, // Pack odd elements
+
+ // Combined (XOR (OR $a, $b), -1)
+ VNOR,
+
+ // Extended vector element extraction
+ VEXTRACT_SEXT_ELT,
+ VEXTRACT_ZEXT_ELT,
+
// Load/Store Left/Right nodes.
LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
LWR,
@@ -211,12 +250,72 @@ namespace llvm {
protected:
SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
- SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const;
-
- SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const;
-
- SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG,
- unsigned HiFlag, unsigned LoFlag) const;
+ // This method creates the following nodes, which are necessary for
+ // computing a local symbol's address:
+ //
+ // (add (load (wrapper $gp, %got(sym)), %lo(sym))
+ template<class NodeTy>
+ SDValue getAddrLocal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ bool HasMips64) const {
+ SDLoc DL(N);
+ unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+ SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(N, Ty, DAG, GOTFlag));
+ SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(), false, false,
+ false, 0);
+ unsigned LoFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+ SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty,
+ getTargetNode(N, Ty, DAG, LoFlag));
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a global symbol's address:
+ //
+ // (load (wrapper $gp, %got(sym)))
+ template<class NodeTy>
+ SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag, SDValue Chain,
+ const MachinePointerInfo &PtrInfo) const {
+ SDLoc DL(N);
+ SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
+ getTargetNode(N, Ty, DAG, Flag));
+ return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a global symbol's address in large-GOT mode:
+ //
+ // (load (wrapper (add %hi(sym), $gp), %lo(sym)))
+ template<class NodeTy>
+ SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG,
+ unsigned HiFlag, unsigned LoFlag,
+ SDValue Chain,
+ const MachinePointerInfo &PtrInfo) const {
+ SDLoc DL(N);
+ SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty,
+ getTargetNode(N, Ty, DAG, HiFlag));
+ Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
+ SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
+ getTargetNode(N, Ty, DAG, LoFlag));
+ return DAG.getLoad(Ty, DL, Chain, Wrapper, PtrInfo, false, false, false,
+ 0);
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address in non-PIC mode:
+ //
+ // (add %hi(sym), %lo(sym))
+ template<class NodeTy>
+ SDValue getAddrNonPIC(NodeTy *N, EVT Ty, SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
+ return DAG.getNode(ISD::ADD, DL, Ty,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+ }
/// This function fills Ops, which is the list of operands that will later
/// be used when a function call node is created. It also generates
@@ -244,9 +343,8 @@ namespace llvm {
Mips16RetHelperConv, NoSpecialCallingConv
};
- MipsCC(
- CallingConv::ID CallConv, bool IsO32, CCState &Info,
- SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
+ MipsCC(CallingConv::ID CallConv, bool IsO32, bool IsFP64, CCState &Info,
+ SpecialCallingConvType SpecialCallingConv = NoSpecialCallingConv);
void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -319,17 +417,39 @@ namespace llvm {
CCState &CCInfo;
CallingConv::ID CallConv;
- bool IsO32;
+ bool IsO32, IsFP64;
SpecialCallingConvType SpecialCallingConv;
SmallVector<ByValArgInfo, 2> ByValArgs;
};
protected:
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
// Subtarget Info
const MipsSubtarget *Subtarget;
bool HasMips64, IsN64, IsO32;
private:
+ // Create a TargetGlobalAddress node.
+ SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetExternalSymbol node.
+ SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetBlockAddress node.
+ SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetJumpTable node.
+ SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
+
+ // Create a TargetConstantPool node.
+ SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
+ unsigned Flag) const;
MipsCC::SpecialCallingConvType getSpecialCallingConv(SDValue Callee) const;
// Lower Operand helpers
@@ -361,8 +481,6 @@ namespace llvm {
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
bool IsSRA) const;
- SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
@@ -433,6 +551,11 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const;
+ /// This function parses registers that appear in inline-asm constraints.
+ /// It returns pair (0, 0) on failure.
+ std::pair<unsigned, const TargetRegisterClass *>
+ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const;
+
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index b992e77..9f7ce9a 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -99,9 +99,9 @@ class ADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin, bit IsComm,
multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
SDPatternOperator OpNode = null_frag> {
- def _D32 : ADDS_FT<opstr, AFGR64RegsOpnd, Itin, IsComm, OpNode>,
+ def _D32 : ADDS_FT<opstr, AFGR64Opnd, Itin, IsComm, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ADDS_FT<opstr, FGR64RegsOpnd, Itin, IsComm, OpNode>,
+ def _D64 : ADDS_FT<opstr, FGR64Opnd, Itin, IsComm, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
string DecoderNamespace = "Mips64";
}
@@ -115,18 +115,18 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
- def _D32 : ABSS_FT<opstr, AFGR64RegsOpnd, AFGR64RegsOpnd, Itin, OpNode>,
+ def _D32 : ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ABSS_FT<opstr, FGR64RegsOpnd, FGR64RegsOpnd, Itin, OpNode>,
+ def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
Requires<[IsFP64bit, HasStdEnc]> {
string DecoderNamespace = "Mips64";
}
}
multiclass ROUND_M<string opstr, InstrItinClass Itin> {
- def _D32 : ABSS_FT<opstr, FGR32RegsOpnd, AFGR64RegsOpnd, Itin>,
+ def _D32 : ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>,
Requires<[NotFP64bit, HasStdEnc]>;
- def _D64 : ABSS_FT<opstr, FGR32RegsOpnd, FGR64RegsOpnd, Itin>,
+ def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>,
Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
@@ -143,16 +143,16 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR>;
class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
- Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
- InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(set RC:$rt, (OpNode addrDefault:$addr))], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
let mayLoad = 1;
}
class SW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
- Operand MemOpnd, SDPatternOperator OpNode= null_frag> :
- InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ SDPatternOperator OpNode= null_frag> :
+ InstSE<(outs), (ins RC:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(OpNode RC:$rt, addrDefault:$addr)], Itin, FrmFI> {
let DecoderMethod = "DecodeFMem";
let mayStore = 1;
@@ -171,19 +171,19 @@ class NMADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
[(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))],
Itin, FrmFR>;
-class LWXC1_FT<string opstr, RegisterOperand DRC, RegisterOperand PRC,
+class LWXC1_FT<string opstr, RegisterOperand DRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
- InstSE<(outs DRC:$fd), (ins PRC:$base, PRC:$index),
+ InstSE<(outs DRC:$fd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$fd, ${index}(${base})"),
- [(set DRC:$fd, (OpNode (add PRC:$base, PRC:$index)))], Itin, FrmFI> {
+ [(set DRC:$fd, (OpNode (add iPTR:$base, iPTR:$index)))], Itin, FrmFI> {
let AddedComplexity = 20;
}
-class SWXC1_FT<string opstr, RegisterOperand DRC, RegisterOperand PRC,
+class SWXC1_FT<string opstr, RegisterOperand DRC,
InstrItinClass Itin, SDPatternOperator OpNode = null_frag> :
- InstSE<(outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+ InstSE<(outs), (ins DRC:$fs, PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$fs, ${index}(${base})"),
- [(OpNode DRC:$fs, (add PRC:$base, PRC:$index))], Itin, FrmFI> {
+ [(OpNode DRC:$fs, (add iPTR:$base, iPTR:$index))], Itin, FrmFI> {
let AddedComplexity = 20;
}
@@ -231,24 +231,24 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt> {
def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC>, C_COND_FM<fmt, 15>;
}
-defm S : C_COND_M<"s", FGR32RegsOpnd, 16>;
-defm D32 : C_COND_M<"d", AFGR64RegsOpnd, 17>,
+defm S : C_COND_M<"s", FGR32Opnd, 16>;
+defm D32 : C_COND_M<"d", AFGR64Opnd, 17>,
Requires<[NotFP64bit, HasStdEnc]>;
let DecoderNamespace = "Mips64" in
-defm D64 : C_COND_M<"d", FGR64RegsOpnd, 17>, Requires<[IsFP64bit, HasStdEnc]>;
+defm D64 : C_COND_M<"d", FGR64Opnd, 17>, Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-def ROUND_W_S : ABSS_FT<"round.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def ROUND_W_S : ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xc, 16>;
-def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def TRUNC_W_S : ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xd, 16>;
-def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CEIL_W_S : ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xe, 16>;
-def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def FLOOR_W_S : ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xf, 16>;
-def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_W_S : ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x24, 16>;
defm ROUND_W : ROUND_M<"round.w.d", IIFcvt>, ABSS_FM<0xc, 17>;
@@ -258,71 +258,71 @@ defm FLOOR_W : ROUND_M<"floor.w.d", IIFcvt>, ABSS_FM<0xf, 17>;
defm CVT_W : ROUND_M<"cvt.w.d", IIFcvt>, ABSS_FM<0x24, 17>;
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
- def ROUND_L_S : ABSS_FT<"round.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x8, 16>;
- def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x8, 17>;
- def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x9, 16>;
- def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x9, 17>;
- def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xa, 16>;
- def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0xa, 17>;
- def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0xb, 16>;
- def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0xb, 17>;
}
-def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_S_W : ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x20, 20>;
-def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+def CVT_L_S : ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x25, 16>;
-def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+def CVT_L_D64: ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x25, 17>;
let Predicates = [NotFP64bit, HasStdEnc] in {
- def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, AFGR64RegsOpnd, IIFcvt>,
+ def CVT_S_D32 : ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 17>;
- def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D32_W : ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 20>;
- def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D32_S : ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 16>;
}
let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
- def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 17>;
- def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x20, 21>;
- def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 20>;
- def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64RegsOpnd, FGR32RegsOpnd, IIFcvt>,
+ def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, IIFcvt>,
ABSS_FM<0x21, 16>;
- def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64RegsOpnd, FGR64RegsOpnd, IIFcvt>,
+ def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, IIFcvt>,
ABSS_FM<0x21, 21>;
}
let isPseudo = 1, isCodeGenOnly = 1 in {
- def PseudoCVT_S_W : ABSS_FT<"", FGR32RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_D32_W : ABSS_FT<"", AFGR64RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_S_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>;
- def PseudoCVT_D64_W : ABSS_FT<"", FGR64RegsOpnd, GPR32Opnd, IIFcvt>;
- def PseudoCVT_D64_L : ABSS_FT<"", FGR64RegsOpnd, GPR64Opnd, IIFcvt>;
+ def PseudoCVT_S_W : ABSS_FT<"", FGR32Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_D32_W : ABSS_FT<"", AFGR64Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_S_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>;
+ def PseudoCVT_D64_W : ABSS_FT<"", FGR64Opnd, GPR32Opnd, IIFcvt>;
+ def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, IIFcvt>;
}
let Predicates = [NoNaNsFPMath, HasStdEnc] in {
- def FABS_S : ABSS_FT<"abs.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fabs>,
+ def FABS_S : ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, IIFcvt, fabs>,
ABSS_FM<0x5, 16>;
- def FNEG_S : ABSS_FT<"neg.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFcvt, fneg>,
+ def FNEG_S : ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, IIFcvt, fneg>,
ABSS_FM<0x7, 16>;
defm FABS : ABSS_M<"abs.d", IIFcvt, fabs>, ABSS_FM<0x5, 17>;
defm FNEG : ABSS_M<"neg.d", IIFcvt, fneg>, ABSS_FM<0x7, 17>;
}
-def FSQRT_S : ABSS_FT<"sqrt.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFsqrtSingle,
+def FSQRT_S : ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, IIFsqrtSingle,
fsqrt>, ABSS_FM<0x4, 16>;
defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
@@ -334,164 +334,134 @@ defm FSQRT : ABSS_M<"sqrt.d", IIFsqrtDouble, fsqrt>, ABSS_FM<0x4, 17>;
/// Move Control Registers From/To CPU Registers
def CFC1 : MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, IIFmove>, MFC1_FM<2>;
def CTC1 : MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, IIFmove>, MFC1_FM<6>;
-def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32RegsOpnd, IIFmoveC1, bitconvert>,
+def MFC1 : MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, IIFmoveC1, bitconvert>,
MFC1_FM<0>;
-def MTC1 : MTC1_FT<"mtc1", FGR32RegsOpnd, GPR32Opnd, IIFmoveC1, bitconvert>,
+def MTC1 : MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, IIFmoveC1, bitconvert>,
MFC1_FM<4>;
-def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64RegsOpnd, IIFmoveC1,
+def MFHC1 : MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, IIFmoveC1>,
+ MFC1_FM<3>;
+def MTHC1 : MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, IIFmoveC1>,
+ MFC1_FM<7>;
+def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, IIFmoveC1,
bitconvert>, MFC1_FM<1>;
-def DMTC1 : MTC1_FT<"dmtc1", FGR64RegsOpnd, GPR64Opnd, IIFmoveC1,
+def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, IIFmoveC1,
bitconvert>, MFC1_FM<5>;
-def FMOV_S : ABSS_FT<"mov.s", FGR32RegsOpnd, FGR32RegsOpnd, IIFmove>,
+def FMOV_S : ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, IIFmove>,
ABSS_FM<0x6, 16>;
-def FMOV_D32 : ABSS_FT<"mov.d", AFGR64RegsOpnd, AFGR64RegsOpnd, IIFmove>,
+def FMOV_D32 : ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, IIFmove>,
ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>;
-def FMOV_D64 : ABSS_FT<"mov.d", FGR64RegsOpnd, FGR64RegsOpnd, IIFmove>,
+def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, IIFmove>,
ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
-let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LWC1_P8 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem64, load>,
- LW_FM<0x31>;
- def SWC1_P8 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem64, store>,
- LW_FM<0x39>;
- def LDC164_P8 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem64, load>,
- LW_FM<0x35> {
- let isCodeGenOnly =1;
- }
- def SDC164_P8 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem64, store>,
- LW_FM<0x3d> {
- let isCodeGenOnly =1;
- }
+let Predicates = [HasStdEnc] in {
+ def LWC1 : LW_FT<"lwc1", FGR32Opnd, IIFLoad, load>, LW_FM<0x31>;
+ def SWC1 : SW_FT<"swc1", FGR32Opnd, IIFStore, store>, LW_FM<0x39>;
}
-let Predicates = [NotN64, HasStdEnc] in {
- def LWC1 : LW_FT<"lwc1", FGR32RegsOpnd, IIFLoad, mem, load>, LW_FM<0x31>;
- def SWC1 : SW_FT<"swc1", FGR32RegsOpnd, IIFStore, mem, store>, LW_FM<0x39>;
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+ def LDC164 : LW_FT<"ldc1", FGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
+ def SDC164 : SW_FT<"sdc1", FGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
}
-let Predicates = [NotN64, HasMips64, HasStdEnc],
- DecoderNamespace = "Mips64" in {
- def LDC164 : LW_FT<"ldc1", FGR64RegsOpnd, IIFLoad, mem, load>, LW_FM<0x35>;
- def SDC164 : SW_FT<"sdc1", FGR64RegsOpnd, IIFStore, mem, store>, LW_FM<0x3d>;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def LDC1 : LW_FT<"ldc1", AFGR64Opnd, IIFLoad, load>, LW_FM<0x35>;
+ def SDC1 : SW_FT<"sdc1", AFGR64Opnd, IIFStore, store>, LW_FM<0x3d>;
}
-let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- let isPseudo = 1, isCodeGenOnly = 1 in {
- def PseudoLDC1 : LW_FT<"", AFGR64RegsOpnd, IIFLoad, mem, load>;
- def PseudoSDC1 : SW_FT<"", AFGR64RegsOpnd, IIFStore, mem, store>;
- }
- def LDC1 : LW_FT<"ldc1", AFGR64RegsOpnd, IIFLoad, mem>, LW_FM<0x35>;
- def SDC1 : SW_FT<"sdc1", AFGR64RegsOpnd, IIFStore, mem>, LW_FM<0x3d>;
+/// Cop2 Memory Instructions
+let Predicates = [HasStdEnc] in {
+ def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
+ def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
+ def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>;
+ def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>;
}
// Indexed loads and stores.
let Predicates = [HasFPIdx, HasStdEnc] in {
- def LWXC1 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<0>;
- def SWXC1 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<8>;
-}
-
-let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
- def LDXC1 : LWXC1_FT<"ldxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<1>;
- def SDXC1 : SWXC1_FT<"sdxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<9>;
+ def LWXC1 : LWXC1_FT<"lwxc1", FGR32Opnd, IIFLoad, load>, LWXC1_FM<0>;
+ def SWXC1 : SWXC1_FT<"swxc1", FGR32Opnd, IIFStore, store>, SWXC1_FM<8>;
}
-let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
- def LDXC164 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad, load>,
- LWXC1_FM<1>;
- def SDXC164 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore, store>,
- SWXC1_FM<9>;
+let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc] in {
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, IIFLoad, load>, LWXC1_FM<1>;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, IIFStore, store>, SWXC1_FM<9>;
}
-// n64
-let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
- def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32RegsOpnd, GPR64Opnd, IIFLoad, load>,
- LWXC1_FM<0>;
- def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64RegsOpnd, GPR64Opnd, IIFLoad,
- load>, LWXC1_FM<1>;
- def SWXC1_P8 : SWXC1_FT<"swxc1", FGR32RegsOpnd, GPR64Opnd, IIFStore,
- store>, SWXC1_FM<8>;
- def SDXC164_P8 : SWXC1_FT<"sdxc1", FGR64RegsOpnd, GPR64Opnd, IIFStore,
- store>, SWXC1_FM<9>;
+let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc],
+ DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, IIFLoad, load>, LWXC1_FM<1>;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, IIFStore, store>, SWXC1_FM<9>;
}
// Load/store doubleword indexed unaligned.
-let Predicates = [NotMips64, HasStdEnc] in {
- def LUXC1 : LWXC1_FT<"luxc1", AFGR64RegsOpnd, GPR32Opnd, IIFLoad>,
- LWXC1_FM<0x5>;
- def SUXC1 : SWXC1_FT<"suxc1", AFGR64RegsOpnd, GPR32Opnd, IIFStore>,
- SWXC1_FM<0xd>;
+let Predicates = [NotFP64bit, HasStdEnc] in {
+ def LUXC1 : LWXC1_FT<"luxc1", AFGR64Opnd, IIFLoad>, LWXC1_FM<0x5>;
+ def SUXC1 : SWXC1_FT<"suxc1", AFGR64Opnd, IIFStore>, SWXC1_FM<0xd>;
}
-let Predicates = [HasMips64, HasStdEnc],
- DecoderNamespace="Mips64" in {
- def LUXC164 : LWXC1_FT<"luxc1", FGR64RegsOpnd, GPR32Opnd, IIFLoad>,
- LWXC1_FM<0x5>;
- def SUXC164 : SWXC1_FT<"suxc1", FGR64RegsOpnd, GPR32Opnd, IIFStore>,
- SWXC1_FM<0xd>;
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in {
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, IIFLoad>, LWXC1_FM<0x5>;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, IIFStore>, SWXC1_FM<0xd>;
}
/// Floating-point Aritmetic
-def FADD_S : ADDS_FT<"add.s", FGR32RegsOpnd, IIFadd, 1, fadd>,
+def FADD_S : ADDS_FT<"add.s", FGR32Opnd, IIFadd, 1, fadd>,
ADDS_FM<0x00, 16>;
defm FADD : ADDS_M<"add.d", IIFadd, 1, fadd>, ADDS_FM<0x00, 17>;
-def FDIV_S : ADDS_FT<"div.s", FGR32RegsOpnd, IIFdivSingle, 0, fdiv>,
+def FDIV_S : ADDS_FT<"div.s", FGR32Opnd, IIFdivSingle, 0, fdiv>,
ADDS_FM<0x03, 16>;
defm FDIV : ADDS_M<"div.d", IIFdivDouble, 0, fdiv>, ADDS_FM<0x03, 17>;
-def FMUL_S : ADDS_FT<"mul.s", FGR32RegsOpnd, IIFmulSingle, 1, fmul>,
+def FMUL_S : ADDS_FT<"mul.s", FGR32Opnd, IIFmulSingle, 1, fmul>,
ADDS_FM<0x02, 16>;
defm FMUL : ADDS_M<"mul.d", IIFmulDouble, 1, fmul>, ADDS_FM<0x02, 17>;
-def FSUB_S : ADDS_FT<"sub.s", FGR32RegsOpnd, IIFadd, 0, fsub>,
+def FSUB_S : ADDS_FT<"sub.s", FGR32Opnd, IIFadd, 0, fsub>,
ADDS_FM<0x01, 16>;
defm FSUB : ADDS_M<"sub.d", IIFadd, 0, fsub>, ADDS_FM<0x01, 17>;
let Predicates = [HasMips32r2, HasStdEnc] in {
- def MADD_S : MADDS_FT<"madd.s", FGR32RegsOpnd, IIFmulSingle, fadd>,
+ def MADD_S : MADDS_FT<"madd.s", FGR32Opnd, IIFmulSingle, fadd>,
MADDS_FM<4, 0>;
- def MSUB_S : MADDS_FT<"msub.s", FGR32RegsOpnd, IIFmulSingle, fsub>,
+ def MSUB_S : MADDS_FT<"msub.s", FGR32Opnd, IIFmulSingle, fsub>,
MADDS_FM<5, 0>;
}
let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_S : NMADDS_FT<"nmadd.s", FGR32RegsOpnd, IIFmulSingle, fadd>,
+ def NMADD_S : NMADDS_FT<"nmadd.s", FGR32Opnd, IIFmulSingle, fadd>,
MADDS_FM<6, 0>;
- def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32RegsOpnd, IIFmulSingle, fsub>,
+ def NMSUB_S : NMADDS_FT<"nmsub.s", FGR32Opnd, IIFmulSingle, fsub>,
MADDS_FM<7, 0>;
}
let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
- def MADD_D32 : MADDS_FT<"madd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>,
+ def MADD_D32 : MADDS_FT<"madd.d", AFGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<4, 1>;
- def MSUB_D32 : MADDS_FT<"msub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>,
+ def MSUB_D32 : MADDS_FT<"msub.d", AFGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
- def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64RegsOpnd, IIFmulDouble, fadd>,
+ def NMADD_D32 : NMADDS_FT<"nmadd.d", AFGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<6, 1>;
- def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64RegsOpnd, IIFmulDouble, fsub>,
+ def NMSUB_D32 : NMADDS_FT<"nmsub.d", AFGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<7, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
- def MADD_D64 : MADDS_FT<"madd.d", FGR64RegsOpnd, IIFmulDouble, fadd>,
+ def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<4, 1>;
- def MSUB_D64 : MADDS_FT<"msub.d", FGR64RegsOpnd, IIFmulDouble, fsub>,
+ def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<5, 1>;
}
let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
isCodeGenOnly=1 in {
- def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64RegsOpnd, IIFmulDouble, fadd>,
+ def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, IIFmulDouble, fadd>,
MADDS_FM<6, 1>;
- def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64RegsOpnd, IIFmulDouble, fsub>,
+ def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, IIFmulDouble, fsub>,
MADDS_FM<7, 1>;
}
@@ -542,20 +512,27 @@ def FCMP_D64 : CEQS_FT<"d", FGR64, IIFcmp, MipsFPCmp>, CEQS_FM<17>,
// This pseudo instr gets expanded into 2 mtc1 instrs after register
// allocation.
-def BuildPairF64 :
- PseudoSE<(outs AFGR64RegsOpnd:$dst),
- (ins GPR32Opnd:$lo, GPR32Opnd:$hi),
- [(set AFGR64RegsOpnd:$dst,
- (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
+class BuildPairF64Base<RegisterOperand RO> :
+ PseudoSE<(outs RO:$dst), (ins GPR32Opnd:$lo, GPR32Opnd:$hi),
+ [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
+
+def BuildPairF64 : BuildPairF64Base<AFGR64Opnd>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def BuildPairF64_64 : BuildPairF64Base<FGR64Opnd>,
+ Requires<[IsFP64bit, HasStdEnc]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
// if n is 0, lower part of src is extracted.
// if n is 1, higher part of src is extracted.
-def ExtractElementF64 :
- PseudoSE<(outs GPR32Opnd:$dst), (ins AFGR64RegsOpnd:$src, i32imm:$n),
- [(set GPR32Opnd:$dst,
- (MipsExtractElementF64 AFGR64RegsOpnd:$src, imm:$n))]>;
+class ExtractElementF64Base<RegisterOperand RO> :
+ PseudoSE<(outs GPR32Opnd:$dst), (ins RO:$src, i32imm:$n),
+ [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>;
+
+def ExtractElementF64 : ExtractElementF64Base<AFGR64Opnd>,
+ Requires<[NotFP64bit, HasStdEnc]>;
+def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
+ Requires<[IsFP64bit, HasStdEnc]>;
//===----------------------------------------------------------------------===//
// InstAliases.
@@ -571,18 +548,18 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_S_W GPR32Opnd:$src)>;
-def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src),
- (TRUNC_W_S FGR32RegsOpnd:$src)>;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S FGR32Opnd:$src)>;
let Predicates = [NotFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_D32_W GPR32Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP AFGR64RegsOpnd:$src),
- (TRUNC_W_D32 AFGR64RegsOpnd:$src)>;
- def : MipsPat<(f32 (fround AFGR64RegsOpnd:$src)),
- (CVT_S_D32 AFGR64RegsOpnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)),
- (CVT_D32_S FGR32RegsOpnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
+ (TRUNC_W_D32 AFGR64Opnd:$src)>;
+ def : MipsPat<(f32 (fround AFGR64Opnd:$src)),
+ (CVT_S_D32 AFGR64Opnd:$src)>;
+ def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D32_S FGR32Opnd:$src)>;
}
let Predicates = [IsFP64bit, HasStdEnc] in {
@@ -592,44 +569,37 @@ let Predicates = [IsFP64bit, HasStdEnc] in {
def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
(PseudoCVT_D64_W GPR32Opnd:$src)>;
def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)),
- (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_32)>;
+ (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>;
def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)),
(PseudoCVT_D64_L GPR64Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src),
- (TRUNC_W_D64 FGR64RegsOpnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR32RegsOpnd:$src),
- (TRUNC_L_S FGR32RegsOpnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR64RegsOpnd:$src),
- (TRUNC_L_D64 FGR64RegsOpnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_W_D64 FGR64Opnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_L_S FGR32Opnd:$src)>;
+ def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_L_D64 FGR64Opnd:$src)>;
- def : MipsPat<(f32 (fround FGR64RegsOpnd:$src)),
- (CVT_S_D64 FGR64RegsOpnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32RegsOpnd:$src)),
- (CVT_D64_S FGR32RegsOpnd:$src)>;
+ def : MipsPat<(f32 (fround FGR64Opnd:$src)),
+ (CVT_S_D64 FGR64Opnd:$src)>;
+ def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D64_S FGR32Opnd:$src)>;
}
// Patterns for loads/stores with a reg+imm operand.
let AddedComplexity = 40 in {
- let Predicates = [IsN64, HasStdEnc] in {
- def : LoadRegImmPat<LWC1_P8, f32, load>;
- def : StoreRegImmPat<SWC1_P8, f32>;
- def : LoadRegImmPat<LDC164_P8, f64, load>;
- def : StoreRegImmPat<SDC164_P8, f64>;
- }
-
- let Predicates = [NotN64, HasStdEnc] in {
+ let Predicates = [HasStdEnc] in {
def : LoadRegImmPat<LWC1, f32, load>;
def : StoreRegImmPat<SWC1, f32>;
}
- let Predicates = [NotN64, HasMips64, HasStdEnc] in {
+ let Predicates = [IsFP64bit, HasStdEnc] in {
def : LoadRegImmPat<LDC164, f64, load>;
def : StoreRegImmPat<SDC164, f64>;
}
- let Predicates = [NotN64, NotMips64, HasStdEnc] in {
- def : LoadRegImmPat<PseudoLDC1, f64, load>;
- def : StoreRegImmPat<PseudoSDC1, f64>;
+ let Predicates = [NotFP64bit, HasStdEnc] in {
+ def : LoadRegImmPat<LDC1, f64, load>;
+ def : StoreRegImmPat<SDC1, f64>;
}
}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1322784..737a018 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -183,7 +183,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
// Format J instruction class in Mips : <|opcode|address|>
//===----------------------------------------------------------------------===//
-class FJ<bits<6> op>
+class FJ<bits<6> op> : StdArch
{
bits<26> target;
@@ -272,7 +272,7 @@ class SRLV_FM<bits<6> funct, bit rotate> : StdArch {
let Inst{5-0} = funct;
}
-class BEQ_FM<bits<6> op> {
+class BEQ_FM<bits<6> op> : StdArch {
bits<5> rs;
bits<5> rt;
bits<16> offset;
@@ -285,7 +285,7 @@ class BEQ_FM<bits<6> op> {
let Inst{15-0} = offset;
}
-class BGEZ_FM<bits<6> op, bits<5> funct> {
+class BGEZ_FM<bits<6> op, bits<5> funct> : StdArch {
bits<5> rs;
bits<16> offset;
@@ -297,17 +297,6 @@ class BGEZ_FM<bits<6> op, bits<5> funct> {
let Inst{15-0} = offset;
}
-class B_FM {
- bits<16> offset;
-
- bits<32> Inst;
-
- let Inst{31-26} = 4;
- let Inst{25-21} = 0;
- let Inst{20-16} = 0;
- let Inst{15-0} = offset;
-}
-
class SLTI_FM<bits<6> op> : StdArch {
bits<5> rt;
bits<5> rs;
@@ -321,7 +310,7 @@ class SLTI_FM<bits<6> op> : StdArch {
let Inst{15-0} = imm16;
}
-class MFLO_FM<bits<6> funct> {
+class MFLO_FM<bits<6> funct> : StdArch {
bits<5> rd;
bits<32> Inst;
@@ -333,7 +322,7 @@ class MFLO_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
-class MTLO_FM<bits<6> funct> {
+class MTLO_FM<bits<6> funct> : StdArch {
bits<5> rs;
bits<32> Inst;
@@ -344,7 +333,7 @@ class MTLO_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
-class SEB_FM<bits<5> funct, bits<6> funct2> {
+class SEB_FM<bits<5> funct, bits<6> funct2> : StdArch {
bits<5> rd;
bits<5> rt;
@@ -358,7 +347,7 @@ class SEB_FM<bits<5> funct, bits<6> funct2> {
let Inst{5-0} = funct2;
}
-class CLO_FM<bits<6> funct> {
+class CLO_FM<bits<6> funct> : StdArch {
bits<5> rd;
bits<5> rs;
bits<5> rt;
@@ -374,7 +363,7 @@ class CLO_FM<bits<6> funct> {
let rt = rd;
}
-class LUI_FM {
+class LUI_FM : StdArch {
bits<5> rt;
bits<16> imm16;
@@ -386,7 +375,7 @@ class LUI_FM {
let Inst{15-0} = imm16;
}
-class JALR_FM {
+class JALR_FM : StdArch {
bits<5> rd;
bits<5> rs;
@@ -400,7 +389,7 @@ class JALR_FM {
let Inst{5-0} = 9;
}
-class BGEZAL_FM<bits<5> funct> {
+class BGEZAL_FM<bits<5> funct> : StdArch {
bits<5> rs;
bits<16> offset;
@@ -435,7 +424,7 @@ class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
let Inst{5-0} = funct;
}
-class EXT_FM<bits<6> funct> {
+class EXT_FM<bits<6> funct> : StdArch {
bits<5> rt;
bits<5> rs;
bits<5> pos;
@@ -465,7 +454,7 @@ class RDHWR_FM {
let Inst{5-0} = 0x3b;
}
-class TEQ_FM<bits<6> funct> {
+class TEQ_FM<bits<6> funct> : StdArch {
bits<5> rs;
bits<5> rt;
bits<10> code_;
@@ -479,6 +468,17 @@ class TEQ_FM<bits<6> funct> {
let Inst{5-0} = funct;
}
+class TEQI_FM<bits<5> funct> : StdArch {
+ bits<5> rs;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 1;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = funct;
+ let Inst{15-0} = imm16;
+}
//===----------------------------------------------------------------------===//
// System calls format <op|code_|funct>
//===----------------------------------------------------------------------===//
@@ -520,6 +520,24 @@ class ER_FM<bits<6> funct>
let Inst{5-0} = funct;
}
+
+//===----------------------------------------------------------------------===//
+// Enable/disable interrupt instruction format <Cop0|MFMC0|rt|12|0|sc|0|0>
+//===----------------------------------------------------------------------===//
+
+class EI_FM<bits<1> sc>
+{
+ bits<32> Inst;
+ bits<5> rt;
+ let Inst{31-26} = 0x10;
+ let Inst{25-21} = 0xb;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = 0xc;
+ let Inst{10-6} = 0;
+ let Inst{5} = sc;
+ let Inst{4-0} = 0;
+}
+
//===----------------------------------------------------------------------===//
//
// FLOATING POINT INSTRUCTION FORMATS
@@ -701,7 +719,7 @@ class CMov_I_F_FM<bits<6> funct, bits<5> fmt> {
let Inst{5-0} = funct;
}
-class CMov_F_I_FM<bit tf> {
+class CMov_F_I_FM<bit tf> : StdArch {
bits<5> rd;
bits<5> rs;
bits<3> fcc;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index eae05a3..0ebad05 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -22,11 +22,14 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "MipsGenInstrInfo.inc"
using namespace llvm;
+// Pin the vtable to this file.
+void MipsInstrInfo::anchor() {}
+
MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
: MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
TM(tm), UncondBrOpc(UncondBr) {}
@@ -219,7 +222,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// If there is only one terminator instruction, process it.
if (!SecondLastOpc) {
- // Unconditional branch
+ // Unconditional branch.
if (LastOpc == UncondBrOpc) {
TBB = LastInst->getOperand(0).getMBB();
return BT_Uncond;
@@ -271,6 +274,10 @@ unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
+ case Mips::CONSTPOOL_ENTRY:
+ // If this machine instr is a constant pool entry, its size is recorded as
+ // operand #2.
+ return MI->getOperand(2).getImm();
}
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index b6480ef..d9ac961 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -27,6 +27,7 @@
namespace llvm {
class MipsInstrInfo : public MipsGenInstrInfo {
+ virtual void anchor();
protected:
MipsTargetMachine &TM;
unsigned UncondBrOpc;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index b9e8895..ebdbaa4 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -23,11 +23,9 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
SDTCisInt<4>]>;
def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>,
- SDTCisVT<2, i32>]>;
-def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
- SDTCisVT<1, i32>,
- SDTCisSameAs<1, 2>]>;
+def SDT_MFLOHI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVT<1, untyped>]>;
+def SDT_MTLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
+ SDTCisInt<1>, SDTCisSameAs<1, 2>]>;
def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def SDT_MipsMAddMSub : SDTypeProfile<1, 3,
@@ -86,11 +84,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
[SDNPHasChain, SDNPSideEffect,
SDNPOptInGlue, SDNPOutGlue]>;
-// Node used to extract integer from LO/HI register.
-def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>;
+// Nodes used to extract LO/HI registers.
+def MipsMFHI : SDNode<"MipsISD::MFHI", SDT_MFLOHI>;
+def MipsMFLO : SDNode<"MipsISD::MFLO", SDT_MFLOHI>;
// Node used to insert 32-bit integers to LOHI register pair.
-def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>;
+def MipsMTLOHI : SDNode<"MipsISD::MTLOHI", SDT_MTLOHI>;
// Mult nodes.
def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>;
@@ -115,7 +114,7 @@ def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16,
// Wrapper node patterns give the instruction selector a chance to replace
// target constant nodes that would otherwise remain unchanged with ADDiu
// nodes. Without these wrapper node patterns, the following conditional move
-// instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
+// instruction is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
// compiled:
// movn %got(d)($gp), %got(c)($gp), $4
// This instruction is illegal since movn can take only register operands.
@@ -182,6 +181,12 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
def NotDSP : Predicate<"!Subtarget.hasDSP()">;
+def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">,
+ AssemblerPredicate<"FeatureMicroMips">;
+def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">,
+ AssemblerPredicate<"!FeatureMicroMips">;
+def IsLE : Predicate<"Subtarget.isLittle()">;
+def IsBE : Predicate<"!Subtarget.isLittle()">;
class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [HasStdEnc];
@@ -242,7 +247,7 @@ def brtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let EncoderMethod = "getJumpTargetOpValue";
}
-def calltarget64: Operand<i64>;
+
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
}
@@ -256,48 +261,67 @@ def uimm20 : Operand<i32> {
def uimm10 : Operand<i32> {
}
-def simm16_64 : Operand<i64>;
-def shamt : Operand<i32>;
+def simm16_64 : Operand<i64> {
+ let DecoderMethod = "DecodeSimm16";
+}
// Unsigned Operand
+def uimm5 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+def uimm6 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
def uimm16 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
}
+def pcrel16 : Operand<i32> {
+}
+
def MipsMemAsmOperand : AsmOperandClass {
let Name = "Mem";
let ParserMethod = "parseMemOperand";
}
-// Address operand
-def mem : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops GPR32, simm16);
- let EncoderMethod = "getMemEncoding";
- let ParserMatchClass = MipsMemAsmOperand;
- let OperandType = "OPERAND_MEMORY";
+def MipsInvertedImmoperand : AsmOperandClass {
+ let Name = "InvNum";
+ let RenderMethod = "addImmOperands";
+ let ParserMethod = "parseInvNum";
+}
+
+def PtrRegAsmOperand : AsmOperandClass {
+ let Name = "PtrReg";
+ let ParserMethod = "parsePtrReg";
+}
+
+
+def InvertedImOperand : Operand<i32> {
+ let ParserMatchClass = MipsInvertedImmoperand;
}
-def mem64 : Operand<i64> {
+// Address operand
+def mem : Operand<iPTR> {
let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops GPR64, simm16_64);
+ let MIOperandInfo = (ops ptr_rc, simm16);
let EncoderMethod = "getMemEncoding";
let ParserMatchClass = MipsMemAsmOperand;
let OperandType = "OPERAND_MEMORY";
}
-def mem_ea : Operand<i32> {
+def mem_ea : Operand<iPTR> {
let PrintMethod = "printMemOperandEA";
- let MIOperandInfo = (ops GPR32, simm16);
+ let MIOperandInfo = (ops ptr_rc, simm16);
let EncoderMethod = "getMemEncoding";
let OperandType = "OPERAND_MEMORY";
}
-def mem_ea_64 : Operand<i64> {
- let PrintMethod = "printMemOperandEA";
- let MIOperandInfo = (ops GPR64, simm16_64);
- let EncoderMethod = "getMemEncoding";
- let OperandType = "OPERAND_MEMORY";
+def PtrRC : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc);
+ let DecoderMethod = "DecodePtrRegisterClass";
+ let ParserMatchClass = PtrRegAsmOperand;
}
// size operand of ext instruction
@@ -370,6 +394,9 @@ def addr :
def addrRegImm :
ComplexPattern<iPTR, 2, "selectAddrRegImm", [frameindex]>;
+def addrRegReg :
+ ComplexPattern<iPTR, 2, "selectAddrRegReg", [frameindex]>;
+
def addrDefault :
ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
@@ -404,9 +431,9 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
// Arithmetic Multiply ADD/SUB
class MArithR<string opstr, bit isComm = 0> :
InstSE<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR> {
- let Defs = [HI, LO];
- let Uses = [HI, LO];
+ !strconcat(opstr, "\t$rs, $rt"), [], IIImult, FrmR, opstr> {
+ let Defs = [HI0, LO0];
+ let Uses = [HI0, LO0];
let isCommutable = isComm;
}
@@ -435,121 +462,66 @@ class shift_rotate_reg<string opstr, RegisterOperand RO,
// Load Upper Imediate
class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
- [], IIArith, FrmI>, IsAsCheapAsAMove {
+ [], IIArith, FrmI, opstr>, IsAsCheapAsAMove {
let neverHasSideEffects = 1;
let isReMaterializable = 1;
}
-class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
- InstrItinClass itin>: FFI<op, outs, ins, asmstr, pattern> {
- bits<21> addr;
- let Inst{25-21} = addr{20-16};
- let Inst{15-0} = addr{15-0};
- let DecoderMethod = "DecodeMem";
-}
-
// Memory Load/Store
-class Load<string opstr, SDPatternOperator OpNode, DAGOperand RO,
- InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr,
- string ofsuffix> :
- InstSE<(outs RO:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(set RO:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI,
- !strconcat(opstr, ofsuffix)> {
+class Load<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(set RO:$rt, (OpNode Addr:$addr))], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let canFoldAsLoad = 1;
let mayLoad = 1;
}
-class Store<string opstr, SDPatternOperator OpNode, DAGOperand RO,
- InstrItinClass Itin, Operand MemOpnd, ComplexPattern Addr,
- string ofsuffix> :
- InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RO:$rt, Addr:$addr)], NoItinerary, FrmI,
- !strconcat(opstr, ofsuffix)> {
+class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
}
-multiclass LoadM<string opstr, DAGOperand RO,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> {
- def NAME : Load<opstr, OpNode, RO, Itin, mem, Addr, "">,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Load<opstr, OpNode, RO, Itin, mem64, Addr, "_p8">,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-multiclass StoreM<string opstr, DAGOperand RO,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> {
- def NAME : Store<opstr, OpNode, RO, Itin, mem, Addr, "">,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : Store<opstr, OpNode, RO, Itin, mem64, Addr, "_p8">,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
// Load/Store Left/Right
let canFoldAsLoad = 1 in
class LoadLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
- Operand MemOpnd> :
- InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src),
+ InstrItinClass Itin> :
+ InstSE<(outs RO:$rt), (ins mem:$addr, RO:$src),
!strconcat(opstr, "\t$rt, $addr"),
- [(set RO:$rt, (OpNode addr:$addr, RO:$src))], NoItinerary, FrmI> {
+ [(set RO:$rt, (OpNode addr:$addr, RO:$src))], Itin, FrmI> {
let DecoderMethod = "DecodeMem";
string Constraints = "$src = $rt";
}
class StoreLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
- Operand MemOpnd>:
- InstSE<(outs), (ins RO:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
- [(OpNode RO:$rt, addr:$addr)], NoItinerary, FrmI> {
+ InstrItinClass Itin> :
+ InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, addr:$addr)], Itin, FrmI> {
let DecoderMethod = "DecodeMem";
}
-multiclass LoadLeftRightM<string opstr, SDNode OpNode, RegisterOperand RO> {
- def NAME : LoadLeftRight<opstr, OpNode, RO, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : LoadLeftRight<opstr, OpNode, RO, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
-multiclass StoreLeftRightM<string opstr, SDNode OpNode, RegisterOperand RO> {
- def NAME : StoreLeftRight<opstr, OpNode, RO, mem>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : StoreLeftRight<opstr, OpNode, RO, mem64>,
- Requires<[IsN64, HasStdEnc]> {
- let DecoderNamespace = "Mips64";
- let isCodeGenOnly = 1;
- }
-}
-
// Conditional Branch
-class CBranch<string opstr, PatFrag cond_op, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, RO:$rt, brtarget:$offset),
+class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
[(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
- FrmI> {
+ FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
let Defs = [AT];
}
-class CBranchZero<string opstr, PatFrag cond_op, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, brtarget:$offset),
+class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
+ RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
- [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch, FrmI> {
+ [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
+ FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -572,9 +544,9 @@ class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
// Jump
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
- SDPatternOperator targetoperator> :
+ SDPatternOperator targetoperator, string bopstr> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(operator targetoperator:$target)], IIBranch, FrmJ> {
+ [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -583,9 +555,9 @@ class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
}
// Unconditional branch
-class UncondBranch<string opstr> :
- InstSE<(outs), (ins brtarget:$offset), !strconcat(opstr, "\t$offset"),
- [(br bb:$offset)], IIBranch, FrmI> {
+class UncondBranch<Instruction BEQInst> :
+ PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>,
+ PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
@@ -596,17 +568,20 @@ class UncondBranch<string opstr> :
// Base class for indirect branch and return instruction classes.
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
-class JumpFR<RegisterOperand RO, SDPatternOperator operator = null_frag>:
- InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch, FrmR>;
+class JumpFR<string opstr, RegisterOperand RO,
+ SDPatternOperator operator = null_frag>:
+ InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch,
+ FrmR, opstr>;
// Indirect branch
-class IndirectBranch<RegisterOperand RO>: JumpFR<RO, brind> {
+class IndirectBranch<string opstr, RegisterOperand RO> :
+ JumpFR<opstr, RO, brind> {
let isBranch = 1;
let isIndirectBranch = 1;
}
// Return instruction
-class RetBase<RegisterOperand RO>: JumpFR<RO> {
+class RetBase<string opstr, RegisterOperand RO>: JumpFR<opstr, RO> {
let isReturn = 1;
let isCodeGenOnly = 1;
let hasCtrlDep = 1;
@@ -615,9 +590,9 @@ class RetBase<RegisterOperand RO>: JumpFR<RO> {
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
- class JumpLink<string opstr> :
- InstSE<(outs), (ins calltarget:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], IIBranch, FrmJ> {
+ class JumpLink<string opstr, DAGOperand opnd> :
+ InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
+ [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
@@ -628,11 +603,11 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLinkReg<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], IIBranch, FrmR, opstr>;
- class BGEZAL_FT<string opstr, RegisterOperand RO> :
- InstSE<(outs), (ins RO:$rs, brtarget:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI>;
+ class BGEZAL_FT<string opstr, DAGOperand opnd, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, opnd:$offset),
+ !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
}
@@ -660,6 +635,20 @@ class ER_FT<string opstr> :
InstSE<(outs), (ins),
opstr, [], NoItinerary, FrmOther>;
+// Interrupts
+class DEI_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins),
+ !strconcat(opstr, "\t$rt"), [], NoItinerary, FrmOther>;
+
+// Wait
+class WAIT_FT<string opstr> :
+ InstSE<(outs), (ins), opstr, [], NoItinerary, FrmOther> {
+ let Inst{31-26} = 0x10;
+ let Inst{25} = 1;
+ let Inst{24-6} = 0;
+ let Inst{5-0} = 0x20;
+}
+
// Sync
let hasSideEffects = 1 in
class SYNC_FT :
@@ -669,8 +658,12 @@ class SYNC_FT :
let hasSideEffects = 1 in
class TEQ_FT<string opstr, RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, RO:$rt, uimm16:$code_),
- !strconcat(opstr, "\t$rs, $rt, $code_"), [], NoItinerary, FrmI>;
+ !strconcat(opstr, "\t$rs, $rt, $code_"), [], NoItinerary,
+ FrmI, opstr>;
+class TEQI_FT<string opstr, RegisterOperand RO> :
+ InstSE<(outs), (ins RO:$rs, uimm16:$imm16),
+ !strconcat(opstr, "\t$rs, $imm16"), [], NoItinerary, FrmOther, opstr>;
// Mul, Div
class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
@@ -698,10 +691,10 @@ class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1,
// Pseudo multiply add/sub instruction with explicit accumulator register
// operands.
class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
- : PseudoSE<(outs ACRegs:$ac),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin),
- [(set ACRegs:$ac,
- (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACRegs:$acin))],
+ : PseudoSE<(outs ACC64:$ac),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin),
+ [(set ACC64:$ac,
+ (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64:$acin))],
IIImult>,
PseudoInstExpansion<(RealInst GPR32Opnd:$rs, GPR32Opnd:$rt)> {
string Constraints = "$acin = $ac";
@@ -710,25 +703,35 @@ class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode>
class Div<string opstr, InstrItinClass itin, RegisterOperand RO,
list<Register> DefRegs> :
InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"),
- [], itin, FrmR> {
+ [], itin, FrmR, opstr> {
let Defs = DefRegs;
}
// Move from Hi/Lo
-class MoveFromLOHI<string opstr, RegisterOperand RO, list<Register> UseRegs>:
- InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR> {
- let Uses = UseRegs;
+class PseudoMFLOHI<RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode>
+ : PseudoSE<(outs DstRC:$rd), (ins SrcRC:$hilo),
+ [(set DstRC:$rd, (OpNode SrcRC:$hilo))], IIHiLo>;
+
+class MoveFromLOHI<string opstr, RegisterOperand RO, Register UseReg>:
+ InstSE<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"), [], IIHiLo, FrmR,
+ opstr> {
+ let Uses = [UseReg];
let neverHasSideEffects = 1;
}
+class PseudoMTLOHI<RegisterClass DstRC, RegisterClass SrcRC>
+ : PseudoSE<(outs DstRC:$lohi), (ins SrcRC:$lo, SrcRC:$hi),
+ [(set DstRC:$lohi, (MipsMTLOHI SrcRC:$lo, SrcRC:$hi))], IIHiLo>;
+
class MoveToLOHI<string opstr, RegisterOperand RO, list<Register> DefRegs>:
- InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo, FrmR> {
+ InstSE<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"), [], IIHiLo,
+ FrmR, opstr> {
let Defs = DefRegs;
let neverHasSideEffects = 1;
}
-class EffectiveAddress<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+class EffectiveAddress<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem_ea:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(set RO:$rt, addr:$addr)], NoItinerary, FrmI> {
let isCodeGenOnly = 1;
let DecoderMethod = "DecodeMem";
@@ -737,26 +740,26 @@ class EffectiveAddress<string opstr, RegisterOperand RO, Operand Mem> :
// Count Leading Ones/Zeros in Word
class CountLeading0<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR>,
+ [(set RO:$rd, (ctlz RO:$rs))], IIArith, FrmR, opstr>,
Requires<[HasBitCount, HasStdEnc]>;
class CountLeading1<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR>,
+ [(set RO:$rd, (ctlz (not RO:$rs)))], IIArith, FrmR, opstr>,
Requires<[HasBitCount, HasStdEnc]>;
// Sign Extend in Register.
class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO> :
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"),
- [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR> {
+ [(set RO:$rd, (sext_inreg RO:$rt, vt))], IIseb, FrmR, opstr> {
let Predicates = [HasSEInReg, HasStdEnc];
}
// Subword Swap
class SubwordSwap<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
- NoItinerary, FrmR> {
+ NoItinerary, FrmR, opstr> {
let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
@@ -767,66 +770,60 @@ class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
IIArith, FrmR>;
// Ext and Ins
-class ExtBase<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ext:$size),
+class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
+ SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (MipsExt RO:$rs, imm:$pos, imm:$size))], NoItinerary,
- FrmR> {
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ FrmR, opstr> {
let Predicates = [HasMips32r2, HasStdEnc];
}
-class InsBase<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rt), (ins RO:$rs, uimm16:$pos, size_ins:$size, RO:$src),
+class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
+ SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (MipsIns RO:$rs, imm:$pos, imm:$size, RO:$src))],
- NoItinerary, FrmR> {
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
+ NoItinerary, FrmR, opstr> {
let Predicates = [HasMips32r2, HasStdEnc];
let Constraints = "$src = $rt";
}
// Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
- PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
-
-multiclass Atomic2Ops32<PatFrag Op> {
- def NAME : Atomic2Ops<Op, GPR32, GPR32>, Requires<[NotN64, HasStdEnc]>;
- def _P8 : Atomic2Ops<Op, GPR32, GPR64>, Requires<[IsN64, HasStdEnc]>;
-}
+class Atomic2Ops<PatFrag Op, RegisterClass DRC> :
+ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$incr),
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$incr))]>;
// Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, RegisterClass DRC, RegisterClass PRC> :
- PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
- [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
-
-multiclass AtomicCmpSwap32<PatFrag Op> {
- def NAME : AtomicCmpSwap<Op, GPR32, GPR32>,
- Requires<[NotN64, HasStdEnc]>;
- def _P8 : AtomicCmpSwap<Op, GPR32, GPR64>,
- Requires<[IsN64, HasStdEnc]>;
-}
+class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
+ PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
+ [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
-class LLBase<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$rt), (ins Mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+class LLBase<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
[], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayLoad = 1;
}
-class SCBase<string opstr, RegisterOperand RO, Operand Mem> :
- InstSE<(outs RO:$dst), (ins RO:$rt, Mem:$addr),
+class SCBase<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, mem:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
let Constraints = "$rt = $dst";
}
-class MFC3OP<dag outs, dag ins, string asmstr> :
- InstSE<outs, ins, asmstr, [], NoItinerary, FrmFR>;
+class MFC3OP<string asmstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt, RO:$rd, uimm16:$sel), (ins),
+ !strconcat(asmstr, "\t$rt, $rd, $sel"), [], NoItinerary, FrmFR>;
-let isBarrier = 1, isTerminator = 1, isCodeGenOnly = 1 in
-def TRAP : InstSE<(outs), (ins), "break", [(trap)], NoItinerary, FrmOther> {
- let Inst = 0x0000000d;
+class TrapBase<Instruction RealInst>
+ : PseudoSE<(outs), (ins), [(trap)], NoItinerary>,
+ PseudoInstExpansion<(RealInst 0, 0)> {
+ let isBarrier = 1;
+ let isTerminator = 1;
+ let isCodeGenOnly = 1;
}
//===----------------------------------------------------------------------===//
@@ -845,38 +842,38 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
}
let usesCustomInserter = 1 in {
- defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8>;
- defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32<atomic_load_add_16>;
- defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32<atomic_load_add_32>;
- defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32<atomic_load_sub_8>;
- defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32<atomic_load_sub_16>;
- defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32<atomic_load_sub_32>;
- defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32<atomic_load_and_8>;
- defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32<atomic_load_and_16>;
- defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32<atomic_load_and_32>;
- defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32<atomic_load_or_8>;
- defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32<atomic_load_or_16>;
- defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32<atomic_load_or_32>;
- defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32<atomic_load_xor_8>;
- defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32<atomic_load_xor_16>;
- defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32<atomic_load_xor_32>;
- defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32<atomic_load_nand_8>;
- defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16>;
- defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32>;
-
- defm ATOMIC_SWAP_I8 : Atomic2Ops32<atomic_swap_8>;
- defm ATOMIC_SWAP_I16 : Atomic2Ops32<atomic_swap_16>;
- defm ATOMIC_SWAP_I32 : Atomic2Ops32<atomic_swap_32>;
-
- defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32<atomic_cmp_swap_8>;
- defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32<atomic_cmp_swap_16>;
- defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>;
+ def ATOMIC_LOAD_ADD_I8 : Atomic2Ops<atomic_load_add_8, GPR32>;
+ def ATOMIC_LOAD_ADD_I16 : Atomic2Ops<atomic_load_add_16, GPR32>;
+ def ATOMIC_LOAD_ADD_I32 : Atomic2Ops<atomic_load_add_32, GPR32>;
+ def ATOMIC_LOAD_SUB_I8 : Atomic2Ops<atomic_load_sub_8, GPR32>;
+ def ATOMIC_LOAD_SUB_I16 : Atomic2Ops<atomic_load_sub_16, GPR32>;
+ def ATOMIC_LOAD_SUB_I32 : Atomic2Ops<atomic_load_sub_32, GPR32>;
+ def ATOMIC_LOAD_AND_I8 : Atomic2Ops<atomic_load_and_8, GPR32>;
+ def ATOMIC_LOAD_AND_I16 : Atomic2Ops<atomic_load_and_16, GPR32>;
+ def ATOMIC_LOAD_AND_I32 : Atomic2Ops<atomic_load_and_32, GPR32>;
+ def ATOMIC_LOAD_OR_I8 : Atomic2Ops<atomic_load_or_8, GPR32>;
+ def ATOMIC_LOAD_OR_I16 : Atomic2Ops<atomic_load_or_16, GPR32>;
+ def ATOMIC_LOAD_OR_I32 : Atomic2Ops<atomic_load_or_32, GPR32>;
+ def ATOMIC_LOAD_XOR_I8 : Atomic2Ops<atomic_load_xor_8, GPR32>;
+ def ATOMIC_LOAD_XOR_I16 : Atomic2Ops<atomic_load_xor_16, GPR32>;
+ def ATOMIC_LOAD_XOR_I32 : Atomic2Ops<atomic_load_xor_32, GPR32>;
+ def ATOMIC_LOAD_NAND_I8 : Atomic2Ops<atomic_load_nand_8, GPR32>;
+ def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, GPR32>;
+ def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, GPR32>;
+
+ def ATOMIC_SWAP_I8 : Atomic2Ops<atomic_swap_8, GPR32>;
+ def ATOMIC_SWAP_I16 : Atomic2Ops<atomic_swap_16, GPR32>;
+ def ATOMIC_SWAP_I32 : Atomic2Ops<atomic_swap_32, GPR32>;
+
+ def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
+ def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
+ def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
}
/// Pseudo instructions for loading and storing accumulator registers.
let isPseudo = 1, isCodeGenOnly = 1 in {
- defm LOAD_AC64 : LoadM<"", ACRegs>;
- defm STORE_AC64 : StoreM<"", ACRegs>;
+ def LOAD_ACC64 : Load<"", ACC64>;
+ def STORE_ACC64 : Store<"", ACC64>;
}
//===----------------------------------------------------------------------===//
@@ -911,6 +908,7 @@ def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, IIArith, add>,
ADD_FM<0, 0x21>;
def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, IIArith, sub>,
ADD_FM<0, 0x23>;
+let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, IIImul, mul>,
ADD_FM<0x1c, 2>;
def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
@@ -926,11 +924,11 @@ def XOR : MMRel, ArithLogicR<"xor", GPR32Opnd, 1, IILogic, xor>,
def NOR : MMRel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
-def SLL : MMRel, shift_rotate_imm<"sll", shamt, GPR32Opnd, shl, immZExt5>,
+def SLL : MMRel, shift_rotate_imm<"sll", uimm5, GPR32Opnd, shl, immZExt5>,
SRA_FM<0, 0>;
-def SRL : MMRel, shift_rotate_imm<"srl", shamt, GPR32Opnd, srl, immZExt5>,
+def SRL : MMRel, shift_rotate_imm<"srl", uimm5, GPR32Opnd, srl, immZExt5>,
SRA_FM<2, 0>;
-def SRA : MMRel, shift_rotate_imm<"sra", shamt, GPR32Opnd, sra, immZExt5>,
+def SRA : MMRel, shift_rotate_imm<"sra", uimm5, GPR32Opnd, sra, immZExt5>,
SRA_FM<3, 0>;
def SLLV : MMRel, shift_rotate_reg<"sllv", GPR32Opnd, shl>, SRLV_FM<4, 0>;
def SRLV : MMRel, shift_rotate_reg<"srlv", GPR32Opnd, srl>, SRLV_FM<6, 0>;
@@ -938,7 +936,7 @@ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, sra>, SRLV_FM<7, 0>;
// Rotate Instructions
let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : MMRel, shift_rotate_imm<"rotr", shamt, GPR32Opnd, rotr,
+ def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, rotr,
immZExt5>,
SRA_FM<2, 1>;
def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, rotr>,
@@ -947,65 +945,85 @@ let Predicates = [HasMips32r2, HasStdEnc] in {
/// Load and Store Instructions
/// aligned
-defm LB : LoadM<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel,
- LW_FM<0x24>;
-defm LH : LoadM<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel,
- LW_FM<0x21>;
-defm LHu : LoadM<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>;
-defm LW : LoadM<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel, LW_FM<0x23>;
-defm SB : StoreM<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>;
-defm SH : StoreM<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>;
-defm SW : StoreM<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>;
+def LB : Load<"lb", GPR32Opnd, sextloadi8, IILoad>, MMRel, LW_FM<0x20>;
+def LBu : Load<"lbu", GPR32Opnd, zextloadi8, IILoad, addrDefault>, MMRel,
+ LW_FM<0x24>;
+def LH : Load<"lh", GPR32Opnd, sextloadi16, IILoad, addrDefault>, MMRel,
+ LW_FM<0x21>;
+def LHu : Load<"lhu", GPR32Opnd, zextloadi16, IILoad>, MMRel, LW_FM<0x25>;
+def LW : Load<"lw", GPR32Opnd, load, IILoad, addrDefault>, MMRel,
+ LW_FM<0x23>;
+def SB : Store<"sb", GPR32Opnd, truncstorei8, IIStore>, MMRel, LW_FM<0x28>;
+def SH : Store<"sh", GPR32Opnd, truncstorei16, IIStore>, MMRel, LW_FM<0x29>;
+def SW : Store<"sw", GPR32Opnd, store, IIStore>, MMRel, LW_FM<0x2b>;
/// load/store left/right
-defm LWL : LoadLeftRightM<"lwl", MipsLWL, GPR32Opnd>, LW_FM<0x22>;
-defm LWR : LoadLeftRightM<"lwr", MipsLWR, GPR32Opnd>, LW_FM<0x26>;
-defm SWL : StoreLeftRightM<"swl", MipsSWL, GPR32Opnd>, LW_FM<0x2a>;
-defm SWR : StoreLeftRightM<"swr", MipsSWR, GPR32Opnd>, LW_FM<0x2e>;
+let Predicates = [NotInMicroMips] in {
+def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, IILoad>, LW_FM<0x22>;
+def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, IILoad>, LW_FM<0x26>;
+def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, IIStore>, LW_FM<0x2a>;
+def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, IIStore>, LW_FM<0x2e>;
+}
def SYNC : SYNC_FT, SYNC_FM;
-def TEQ : TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
+def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
+def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
+def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
+def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>;
+def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>;
+def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>;
+
+def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>;
+def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>;
+def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>;
+def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>;
+def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>;
+def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>;
def BREAK : BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : SYS_FT<"syscall">, SYS_FM<0xc>;
+def TRAP : TrapBase<BREAK>;
def ERET : ER_FT<"eret">, ER_FM<0x18>;
def DERET : ER_FT<"deret">, ER_FM<0x1f>;
-/// Load-linked, Store-conditional
-let Predicates = [NotN64, HasStdEnc] in {
- def LL : LLBase<"ll", GPR32Opnd, mem>, LW_FM<0x30>;
- def SC : SCBase<"sc", GPR32Opnd, mem>, LW_FM<0x38>;
-}
+def EI : DEI_FT<"ei", GPR32Opnd>, EI_FM<1>;
+def DI : DEI_FT<"di", GPR32Opnd>, EI_FM<0>;
-let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LL_P8 : LLBase<"ll", GPR32Opnd, mem64>, LW_FM<0x30>;
- def SC_P8 : SCBase<"sc", GPR32Opnd, mem64>, LW_FM<0x38>;
-}
+def WAIT : WAIT_FT<"wait">;
+
+/// Load-linked, Store-conditional
+def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>;
+def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>;
/// Jump and Branch Instructions
-def J : JumpFJ<jmptarget, "j", br, bb>, FJ<2>,
+def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
Requires<[RelocStatic, HasStdEnc]>, IsBranch;
-def JR : IndirectBranch<GPR32Opnd>, MTLO_FM<8>;
-def B : UncondBranch<"b">, B_FM;
-def BEQ : CBranch<"beq", seteq, GPR32Opnd>, BEQ_FM<4>;
-def BNE : CBranch<"bne", setne, GPR32Opnd>, BEQ_FM<5>;
-def BGEZ : CBranchZero<"bgez", setge, GPR32Opnd>, BGEZ_FM<1, 1>;
-def BGTZ : CBranchZero<"bgtz", setgt, GPR32Opnd>, BGEZ_FM<7, 0>;
-def BLEZ : CBranchZero<"blez", setle, GPR32Opnd>, BGEZ_FM<6, 0>;
-def BLTZ : CBranchZero<"bltz", setlt, GPR32Opnd>, BGEZ_FM<1, 0>;
-
-def JAL : JumpLink<"jal">, FJ<3>;
-def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
+def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>;
+def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>;
+def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>;
+def BGEZ : MMRel, CBranchZero<"bgez", brtarget, setge, GPR32Opnd>,
+ BGEZ_FM<1, 1>;
+def BGTZ : MMRel, CBranchZero<"bgtz", brtarget, setgt, GPR32Opnd>,
+ BGEZ_FM<7, 0>;
+def BLEZ : MMRel, CBranchZero<"blez", brtarget, setle, GPR32Opnd>,
+ BGEZ_FM<6, 0>;
+def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>,
+ BGEZ_FM<1, 0>;
+def B : UncondBranch<BEQ>;
+
+def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
+def JALR : MMRel, JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
-def BGEZAL : BGEZAL_FT<"bgezal", GPR32Opnd>, BGEZAL_FM<0x11>;
-def BLTZAL : BGEZAL_FT<"bltzal", GPR32Opnd>, BGEZAL_FM<0x10>;
+def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>;
+def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
-def TAILCALL : JumpFJ<calltarget, "j", MipsTailCall, imm>, FJ<2>, IsTailCall;
-def TAILCALL_R : JumpFR<GPR32Opnd, MipsTailCall>, MTLO_FM<8>, IsTailCall;
+def TAILCALL : MMRel, JumpFJ<calltarget, "j", MipsTailCall, imm, "tcall">,
+ FJ<2>, IsTailCall;
+def TAILCALL_R : MMRel, JumpFR<"tcallr", GPR32Opnd, MipsTailCall>, MTLO_FM<8>,
+ IsTailCall;
-def RET : RetBase<GPR32Opnd>, MTLO_FM<8>;
+def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>;
// Exception handling related node and instructions.
// The conversion sequence is:
@@ -1029,34 +1047,30 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
}
/// Multiply and Divide Instructions.
-def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI, LO]>,
+def MULT : MMRel, Mult<"mult", IIImult, GPR32Opnd, [HI0, LO0]>,
MULT_FM<0, 0x18>;
-def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI, LO]>,
+def MULTu : MMRel, Mult<"multu", IIImult, GPR32Opnd, [HI0, LO0]>,
MULT_FM<0, 0x19>;
-def PseudoMULT : MultDivPseudo<MULT, ACRegs, GPR32Opnd, MipsMult, IIImult>;
-def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, GPR32Opnd, MipsMultu, IIImult>;
-def SDIV : Div<"div", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1a>;
-def UDIV : Div<"divu", IIIdiv, GPR32Opnd, [HI, LO]>, MULT_FM<0, 0x1b>;
-def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, GPR32Opnd, MipsDivRem, IIIdiv,
- 0, 1, 1>;
-def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, GPR32Opnd, MipsDivRemU, IIIdiv,
- 0, 1, 1>;
+def SDIV : MMRel, Div<"div", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM<0, 0x1a>;
+def UDIV : MMRel, Div<"divu", IIIdiv, GPR32Opnd, [HI0, LO0]>,
+ MULT_FM<0, 0x1b>;
-def MTHI : MoveToLOHI<"mthi", GPR32Opnd, [HI]>, MTLO_FM<0x11>;
-def MTLO : MoveToLOHI<"mtlo", GPR32Opnd, [LO]>, MTLO_FM<0x13>;
-def MFHI : MoveFromLOHI<"mfhi", GPR32Opnd, [HI]>, MFLO_FM<0x10>;
-def MFLO : MoveFromLOHI<"mflo", GPR32Opnd, [LO]>, MFLO_FM<0x12>;
+def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>;
+def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>;
+def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>;
+def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>;
/// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>;
-def SEH : SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>;
+def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd>, SEB_FM<0x10, 0x20>;
+def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd>, SEB_FM<0x18, 0x20>;
/// Count Leading
-def CLZ : CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>;
-def CLO : CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>;
+def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>;
+def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>;
/// Word Swap Bytes Within Halfwords
-def WSBH : SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>;
+def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>;
/// No operation.
def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1065,39 +1079,41 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
// instructions. The same not happens for stack address copies, so an
// add op with mem ComplexPattern is used and the stack address copy
// can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd, mem_ea>, LW_FM<9>;
+def LEA_ADDiu : EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
-def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
-def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>;
-def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>;
+def MADD : MMRel, MArithR<"madd", 1>, MULT_FM<0x1c, 0>;
+def MADDU : MMRel, MArithR<"maddu", 1>, MULT_FM<0x1c, 1>;
+def MSUB : MMRel, MArithR<"msub">, MULT_FM<0x1c, 4>;
+def MSUBU : MMRel, MArithR<"msubu">, MULT_FM<0x1c, 5>;
+
+let Predicates = [HasStdEnc, NotDSP] in {
+def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, IIImult>;
+def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, IIImult>;
+def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>;
+def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>;
+def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>;
def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>;
def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>;
def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>;
def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>;
+}
+
+def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, IIIdiv,
+ 0, 1, 1>;
+def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, IIIdiv,
+ 0, 1, 1>;
def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
-def EXT : ExtBase<"ext", GPR32Opnd>, EXT_FM<0>;
-def INS : InsBase<"ins", GPR32Opnd>, EXT_FM<4>;
+def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
+def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
-def MFC0_3OP : MFC3OP<(outs GPR32Opnd:$rt),
- (ins GPR32Opnd:$rd, uimm16:$sel),
- "mfc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 0>;
-
-def MTC0_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel),
- (ins GPR32Opnd:$rt),
- "mtc0\t$rt, $rd, $sel">, MFC3OP_FM<0x10, 4>;
-
-def MFC2_3OP : MFC3OP<(outs GPR32Opnd:$rt),
- (ins GPR32Opnd:$rd, uimm16:$sel),
- "mfc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 0>;
-
-def MTC2_3OP : MFC3OP<(outs GPR32Opnd:$rd, uimm16:$sel),
- (ins GPR32Opnd:$rt),
- "mtc2\t$rt, $rd, $sel">, MFC3OP_FM<0x12, 4>;
+def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>;
+def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>;
+def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
+def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
//===----------------------------------------------------------------------===//
// Instruction aliases
@@ -1129,14 +1145,11 @@ def : InstAlias<"xor $rs, $rt, $imm",
def : InstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
-def : InstAlias<"mfc0 $rt, $rd",
- (MFC0_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc0 $rt, $rd",
- (MTC0_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>;
-def : InstAlias<"mfc2 $rt, $rd",
- (MFC2_3OP GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc2 $rt, $rd",
- (MTC2_3OP GPR32Opnd:$rd, 0, GPR32Opnd:$rt), 0>;
+def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : InstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
def : InstAlias<"bnez $rs,$offset",
(BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : InstAlias<"beqz $rs,$offset",
@@ -1145,6 +1158,20 @@ def : InstAlias<"syscall", (SYSCALL 0), 1>;
def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
def : InstAlias<"break", (BREAK 0, 0), 1>;
+def : InstAlias<"ei", (EI ZERO), 1>;
+def : InstAlias<"di", (DI ZERO), 1>;
+
+def : InstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : InstAlias<"sub, $rd, $rs, $imm",
+ (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
+def : InstAlias<"subu, $rd, $rs, $imm",
+ (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1152,7 +1179,7 @@ def : InstAlias<"break", (BREAK 0, 0), 1>;
class LoadImm32< string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", shamt,GPR32Opnd>;
+def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>;
class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
@@ -1162,9 +1189,7 @@ def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>;
class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", shamt,GPR32Opnd>;
-
-
+def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -1261,24 +1286,15 @@ def : MipsPat<(not GPR32:$in),
(NOR GPR32Opnd:$in, ZERO)>;
// extended loads
-let Predicates = [NotN64, HasStdEnc] in {
+let Predicates = [HasStdEnc] in {
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu_P8 addr:$src)>;
- def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu_P8 addr:$src)>;
- def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
-}
// peepholes
-let Predicates = [NotN64, HasStdEnc] in {
- def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
-}
-let Predicates = [IsN64, HasStdEnc] in {
- def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
-}
+let Predicates = [HasStdEnc] in
+def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
// brcond patterns
multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
@@ -1369,22 +1385,13 @@ defm : SetgeImmPats<GPR32, SLTi, SLTiu>;
// bswap pattern
def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>;
-// mflo/hi patterns.
-def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)),
- (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>;
-
// Load halfword/word patterns.
let AddedComplexity = 40 in {
- let Predicates = [NotN64, HasStdEnc] in {
+ let Predicates = [HasStdEnc] in {
def : LoadRegImmPat<LBu, i32, zextloadi8>;
def : LoadRegImmPat<LH, i32, sextloadi16>;
def : LoadRegImmPat<LW, i32, load>;
}
- let Predicates = [IsN64, HasStdEnc] in {
- def : LoadRegImmPat<LBu_P8, i32, zextloadi8>;
- def : LoadRegImmPat<LH_P8, i32, sextloadi16>;
- def : LoadRegImmPat<LW_P8, i32, load>;
- }
}
//===----------------------------------------------------------------------===//
@@ -1405,6 +1412,10 @@ include "Mips16InstrInfo.td"
include "MipsDSPInstrFormats.td"
include "MipsDSPInstrInfo.td"
+// MSA
+include "MipsMSAInstrFormats.td"
+include "MipsMSAInstrInfo.td"
+
// Micromips
include "MicroMipsInstrFormats.td"
include "MicroMipsInstrInfo.td"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 971176e..2efe578 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -237,6 +237,11 @@ void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
MIB.addMBB(MBBOpnd);
+ // Bundle the instruction in the delay slot to the newly created branch
+ // and erase the original branch.
+ assert(Br->isBundledWithSucc());
+ MachineBasicBlock::instr_iterator II(Br);
+ MIBundleBuilder(&*MIB).append((++II)->removeFromBundle());
Br->eraseFromParent();
}
@@ -432,8 +437,10 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
if (!I->Br || I->HasLongBranch)
continue;
+ int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+
// Check if offset fits into 16-bit immediate field of branches.
- if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / 4))
+ if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
continue;
I->HasLongBranch = true;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index d836975..b6dfadc 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -28,8 +28,7 @@ using namespace llvm;
MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
: AsmPrinter(asmprinter) {}
-void MipsMCInstLower::Initialize(Mangler *M, MCContext *C) {
- Mang = M;
+void MipsMCInstLower::Initialize(MCContext *C) {
Ctx = C;
}
@@ -74,7 +73,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
break;
case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
Offset += MO.getOffset();
break;
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index c4a6016..4570bd9 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -19,7 +19,6 @@ namespace llvm {
class MCOperand;
class MachineInstr;
class MachineFunction;
- class Mangler;
class MipsAsmPrinter;
/// MipsMCInstLower - This class is used to lower an MachineInstr into an
@@ -27,11 +26,10 @@ namespace llvm {
class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
typedef MachineOperand::MachineOperandType MachineOperandType;
MCContext *Ctx;
- Mangler *Mang;
MipsAsmPrinter &AsmPrinter;
public:
MipsMCInstLower(MipsAsmPrinter &asmprinter);
- void Initialize(Mangler *mang, MCContext *C);
+ void Initialize(MCContext *C);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
new file mode 100644
index 0000000..875dc0b
--- /dev/null
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -0,0 +1,406 @@
+//===- MipsMSAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def HasMSA : Predicate<"Subtarget.hasMSA()">,
+ AssemblerPredicate<"FeatureMSA">;
+
+class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
+ let Predicates = [HasMSA];
+ let Inst{31-26} = 0b011110;
+}
+
+class MSACBranch : MSAInst {
+ let Inst{31-26} = 0b010001;
+}
+
+class MSASpecial : MSAInst {
+ let Inst{31-26} = 0b000000;
+}
+
+class PseudoMSA<dag outs, dag ins, list<dag> pattern,
+ InstrItinClass itin = IIPseudo>:
+ MipsPseudo<outs, ins, pattern, itin> {
+ let Predicates = [HasMSA];
+}
+
+class MSA_BIT_B_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<3> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-19} = 0b1110;
+ let Inst{18-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_H_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<4> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-20} = 0b110;
+ let Inst{19-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_W_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<5> m;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = 0b10;
+ let Inst{20-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_BIT_D_FMT<bits<3> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+ bits<6> m;
+
+ let Inst{25-23} = major;
+ let Inst{22} = 0b0;
+ let Inst{21-16} = m;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2R_FILL_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-18} = major;
+ let Inst{17-16} = df;
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2R_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-18} = major;
+ let Inst{17-16} = df;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_2RF_FMT<bits<9> major, bits<1> df, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-17} = major;
+ let Inst{16} = df;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3R_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3RF_FMT<bits<4> major, bits<1> df, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_3R_INDEX_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> rt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_CFCMSA_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> rd;
+ bits<5> cs;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = cs;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_CTCMSA_FMT<bits<10> major, bits<6> minor>: MSAInst {
+ bits<5> rs;
+ bits<5> cd;
+
+ let Inst{25-16} = major;
+ let Inst{15-11} = rs;
+ let Inst{10-6} = cd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-17} = 0b11100;
+ let Inst{16} = n{0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<4> n;
+ bits<5> ws;
+ bits<5> rd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = ws;
+ let Inst{10-6} = rd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_B_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-20} = 0b00;
+ let Inst{19-16} = n{3-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_H_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-19} = 0b100;
+ let Inst{18-16} = n{2-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_ELM_INSERT_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
+ bits<6> n;
+ bits<5> rs;
+ bits<5> wd;
+
+ let Inst{25-22} = major;
+ let Inst{21-18} = 0b1100;
+ let Inst{17-16} = n{1-0};
+ let Inst{15-11} = rs;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I5_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<5> imm;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = imm;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I8_FMT<bits<2> major, bits<6> minor>: MSAInst {
+ bits<8> u8;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-24} = major;
+ let Inst{23-16} = u8;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_I10_FMT<bits<3> major, bits<2> df, bits<6> minor>: MSAInst {
+ bits<10> s10;
+ bits<5> wd;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-11} = s10;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_MI10_FMT<bits<2> df, bits<4> minor>: MSAInst {
+ bits<21> addr;
+ bits<5> wd;
+
+ let Inst{25-16} = addr{9-0};
+ let Inst{15-11} = addr{20-16};
+ let Inst{10-6} = wd;
+ let Inst{5-2} = minor;
+ let Inst{1-0} = df;
+}
+
+class MSA_VEC_FMT<bits<5> major, bits<6> minor>: MSAInst {
+ bits<5> wt;
+ bits<5> ws;
+ bits<5> wd;
+
+ let Inst{25-21} = major;
+ let Inst{20-16} = wt;
+ let Inst{15-11} = ws;
+ let Inst{10-6} = wd;
+ let Inst{5-0} = minor;
+}
+
+class MSA_CBRANCH_FMT<bits<3> major, bits<2> df>: MSACBranch {
+ bits<16> offset;
+ bits<5> wt;
+
+ let Inst{25-23} = major;
+ let Inst{22-21} = df;
+ let Inst{20-16} = wt;
+ let Inst{15-0} = offset;
+}
+
+class MSA_CBRANCH_V_FMT<bits<5> major>: MSACBranch {
+ bits<16> offset;
+ bits<5> wt;
+
+ let Inst{25-21} = major;
+ let Inst{20-16} = wt;
+ let Inst{15-0} = offset;
+}
+
+class SPECIAL_LSA_FMT<bits<6> minor>: MSASpecial {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+ bits<2> sa;
+
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = 0b000;
+ let Inst{7-6} = sa;
+ let Inst{5-0} = minor;
+}
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
new file mode 100644
index 0000000..82c51a6
--- /dev/null
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -0,0 +1,3694 @@
+//===- MipsMSAInstrInfo.td - MSA ASE instructions -*- tablegen ------------*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips MSA ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
+def SDT_VSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>]>;
+def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>]>;
+def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisInt<1>, SDTCisVec<1>,
+ SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>;
+def SDT_ILV : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
+
+def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>;
+def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>;
+def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>;
+def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>;
+def MipsVSMax : SDNode<"MipsISD::VSMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVSMin : SDNode<"MipsISD::VSMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>;
+def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>;
+def MipsILVEV : SDNode<"MipsISD::ILVEV", SDT_ILV>;
+def MipsILVOD : SDNode<"MipsISD::ILVOD", SDT_ILV>;
+def MipsILVL : SDNode<"MipsISD::ILVL", SDT_ILV>;
+def MipsILVR : SDNode<"MipsISD::ILVR", SDT_ILV>;
+def MipsPCKEV : SDNode<"MipsISD::PCKEV", SDT_ILV>;
+def MipsPCKOD : SDNode<"MipsISD::PCKOD", SDT_ILV>;
+
+def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>;
+def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>;
+
+def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
+ SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+
+// Operands
+
+def uimm2 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// The immediate of an LSA instruction needs special handling
+// as the encoded value should be subtracted by one.
+def uimm2LSAAsmOperand : AsmOperandClass {
+ let Name = "LSAImm";
+ let ParserMethod = "parseLSAImm";
+ let RenderMethod = "addImmOperands";
+}
+
+def LSAImm : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getLSAImmEncoding";
+ let DecoderMethod = "DecodeLSAImm";
+ let ParserMatchClass = uimm2LSAAsmOperand;
+}
+
+def uimm3 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def uimm4 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def uimm8 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def simm5 : Operand<i32>;
+
+def simm10 : Operand<i32>;
+
+def vsplat_uimm1 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm2 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm3 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm4 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm5 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm6 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_uimm8 : Operand<vAny> {
+ let PrintMethod = "printUnsignedImm8";
+}
+
+def vsplat_simm5 : Operand<vAny>;
+
+def vsplat_simm10 : Operand<vAny>;
+
+def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
+
+// Pattern fragments
+def vextract_sext_i8 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i8)>;
+def vextract_sext_i16 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i16)>;
+def vextract_sext_i32 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractSExt node:$vec, node:$idx, i32)>;
+
+def vextract_zext_i8 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i8)>;
+def vextract_zext_i16 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i16)>;
+def vextract_zext_i32 : PatFrag<(ops node:$vec, node:$idx),
+ (MipsVExtractZExt node:$vec, node:$idx, i32)>;
+
+def vinsert_v16i8 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v16i8 (vector_insert node:$vec, node:$val, node:$idx))>;
+def vinsert_v8i16 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v8i16 (vector_insert node:$vec, node:$val, node:$idx))>;
+def vinsert_v4i32 : PatFrag<(ops node:$vec, node:$val, node:$idx),
+ (v4i32 (vector_insert node:$vec, node:$val, node:$idx))>;
+
+class vfsetcc_type<ValueType ResTy, ValueType OpTy, CondCode CC> :
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (ResTy (vfsetcc (OpTy node:$lhs), (OpTy node:$rhs), CC))>;
+
+// ISD::SETFALSE cannot occur
+def vfsetoeq_v4f32 : vfsetcc_type<v4i32, v4f32, SETOEQ>;
+def vfsetoeq_v2f64 : vfsetcc_type<v2i64, v2f64, SETOEQ>;
+def vfsetoge_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGE>;
+def vfsetoge_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGE>;
+def vfsetogt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOGT>;
+def vfsetogt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOGT>;
+def vfsetole_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLE>;
+def vfsetole_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLE>;
+def vfsetolt_v4f32 : vfsetcc_type<v4i32, v4f32, SETOLT>;
+def vfsetolt_v2f64 : vfsetcc_type<v2i64, v2f64, SETOLT>;
+def vfsetone_v4f32 : vfsetcc_type<v4i32, v4f32, SETONE>;
+def vfsetone_v2f64 : vfsetcc_type<v2i64, v2f64, SETONE>;
+def vfsetord_v4f32 : vfsetcc_type<v4i32, v4f32, SETO>;
+def vfsetord_v2f64 : vfsetcc_type<v2i64, v2f64, SETO>;
+def vfsetun_v4f32 : vfsetcc_type<v4i32, v4f32, SETUO>;
+def vfsetun_v2f64 : vfsetcc_type<v2i64, v2f64, SETUO>;
+def vfsetueq_v4f32 : vfsetcc_type<v4i32, v4f32, SETUEQ>;
+def vfsetueq_v2f64 : vfsetcc_type<v2i64, v2f64, SETUEQ>;
+def vfsetuge_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGE>;
+def vfsetuge_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGE>;
+def vfsetugt_v4f32 : vfsetcc_type<v4i32, v4f32, SETUGT>;
+def vfsetugt_v2f64 : vfsetcc_type<v2i64, v2f64, SETUGT>;
+def vfsetule_v4f32 : vfsetcc_type<v4i32, v4f32, SETULE>;
+def vfsetule_v2f64 : vfsetcc_type<v2i64, v2f64, SETULE>;
+def vfsetult_v4f32 : vfsetcc_type<v4i32, v4f32, SETULT>;
+def vfsetult_v2f64 : vfsetcc_type<v2i64, v2f64, SETULT>;
+def vfsetune_v4f32 : vfsetcc_type<v4i32, v4f32, SETUNE>;
+def vfsetune_v2f64 : vfsetcc_type<v2i64, v2f64, SETUNE>;
+// ISD::SETTRUE cannot occur
+// ISD::SETFALSE2 cannot occur
+// ISD::SETTRUE2 cannot occur
+
+class vsetcc_type<ValueType ResTy, CondCode CC> :
+ PatFrag<(ops node:$lhs, node:$rhs),
+ (ResTy (vsetcc node:$lhs, node:$rhs, CC))>;
+
+def vseteq_v16i8 : vsetcc_type<v16i8, SETEQ>;
+def vseteq_v8i16 : vsetcc_type<v8i16, SETEQ>;
+def vseteq_v4i32 : vsetcc_type<v4i32, SETEQ>;
+def vseteq_v2i64 : vsetcc_type<v2i64, SETEQ>;
+def vsetle_v16i8 : vsetcc_type<v16i8, SETLE>;
+def vsetle_v8i16 : vsetcc_type<v8i16, SETLE>;
+def vsetle_v4i32 : vsetcc_type<v4i32, SETLE>;
+def vsetle_v2i64 : vsetcc_type<v2i64, SETLE>;
+def vsetlt_v16i8 : vsetcc_type<v16i8, SETLT>;
+def vsetlt_v8i16 : vsetcc_type<v8i16, SETLT>;
+def vsetlt_v4i32 : vsetcc_type<v4i32, SETLT>;
+def vsetlt_v2i64 : vsetcc_type<v2i64, SETLT>;
+def vsetule_v16i8 : vsetcc_type<v16i8, SETULE>;
+def vsetule_v8i16 : vsetcc_type<v8i16, SETULE>;
+def vsetule_v4i32 : vsetcc_type<v4i32, SETULE>;
+def vsetule_v2i64 : vsetcc_type<v2i64, SETULE>;
+def vsetult_v16i8 : vsetcc_type<v16i8, SETULT>;
+def vsetult_v8i16 : vsetcc_type<v8i16, SETULT>;
+def vsetult_v4i32 : vsetcc_type<v4i32, SETULT>;
+def vsetult_v2i64 : vsetcc_type<v2i64, SETULT>;
+
+def vsplati8 : PatFrag<(ops node:$e0),
+ (v16i8 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati16 : PatFrag<(ops node:$e0),
+ (v8i16 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati32 : PatFrag<(ops node:$e0),
+ (v4i32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplati64 : PatFrag<(ops node:$e0),
+ (v2i64 (build_vector:$v0 node:$e0, node:$e0))>;
+def vsplatf32 : PatFrag<(ops node:$e0),
+ (v4f32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def vsplatf64 : PatFrag<(ops node:$e0),
+ (v2f64 (build_vector node:$e0, node:$e0))>;
+
+def vsplati8_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati8 node:$i), node:$v, node:$v)>;
+def vsplati16_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati16 node:$i), node:$v, node:$v)>;
+def vsplati32_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati32 node:$i), node:$v, node:$v)>;
+def vsplati64_elt : PatFrag<(ops node:$v, node:$i),
+ (MipsVSHF (vsplati64 node:$i), node:$v, node:$v)>;
+
+class SplatPatLeaf<Operand opclass, dag frag, code pred = [{}],
+ SDNodeXForm xform = NOOP_SDNodeXForm>
+ : PatLeaf<frag, pred, xform> {
+ Operand OpClass = opclass;
+}
+
+class SplatComplexPattern<Operand opclass, ValueType ty, int numops, string fn,
+ list<SDNode> roots = [],
+ list<SDNodeProperty> props = []> :
+ ComplexPattern<ty, numops, fn, roots, props> {
+ Operand OpClass = opclass;
+}
+
+def vsplati8_uimm3 : SplatComplexPattern<vsplat_uimm3, v16i8, 1,
+ "selectVSplatUimm3",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm4 : SplatComplexPattern<vsplat_uimm4, v16i8, 1,
+ "selectVSplatUimm4",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm5 : SplatComplexPattern<vsplat_uimm5, v16i8, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati8_uimm8 : SplatComplexPattern<vsplat_uimm8, v16i8, 1,
+ "selectVSplatUimm8",
+ [build_vector, bitconvert]>;
+
+def vsplati8_simm5 : SplatComplexPattern<vsplat_simm5, v16i8, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm3 : SplatComplexPattern<vsplat_uimm3, v8i16, 1,
+ "selectVSplatUimm3",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm4 : SplatComplexPattern<vsplat_uimm4, v8i16, 1,
+ "selectVSplatUimm4",
+ [build_vector, bitconvert]>;
+
+def vsplati16_uimm5 : SplatComplexPattern<vsplat_uimm5, v8i16, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati16_simm5 : SplatComplexPattern<vsplat_simm5, v8i16, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati32_uimm2 : SplatComplexPattern<vsplat_uimm2, v4i32, 1,
+ "selectVSplatUimm2",
+ [build_vector, bitconvert]>;
+
+def vsplati32_uimm5 : SplatComplexPattern<vsplat_uimm5, v4i32, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati32_simm5 : SplatComplexPattern<vsplat_simm5, v4i32, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm1 : SplatComplexPattern<vsplat_uimm1, v2i64, 1,
+ "selectVSplatUimm1",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm5 : SplatComplexPattern<vsplat_uimm5, v2i64, 1,
+ "selectVSplatUimm5",
+ [build_vector, bitconvert]>;
+
+def vsplati64_uimm6 : SplatComplexPattern<vsplat_uimm6, v2i64, 1,
+ "selectVSplatUimm6",
+ [build_vector, bitconvert]>;
+
+def vsplati64_simm5 : SplatComplexPattern<vsplat_simm5, v2i64, 1,
+ "selectVSplatSimm5",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that is an exact
+// power of 2
+def vsplat_uimm_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that is the bitwise
+// inverse of an exact power of 2
+def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with only a consecutive sequence
+// of left-most bits set.
+def vsplat_maskl_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
+ "selectVSplatMaskL",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with only a consecutive sequence
+// of right-most bits set.
+def vsplat_maskr_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
+ "selectVSplatMaskR",
+ [build_vector, bitconvert]>;
+
+// Any build_vector that is a constant splat with a value that equals 1
+// FIXME: These should be a ComplexPattern but we can't use them because the
+// ISel generator requires the uses to have a name, but providing a name
+// causes other errors ("used in pattern but not operand list")
+def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
+ APInt Imm;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat (N, Imm) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
+}]>;
+
+def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
+ APInt Imm;
+ SDNode *BV = N->getOperand(0).getNode();
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ return selectVSplat (BV, Imm) &&
+ Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
+}]>;
+
+def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_h : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_w : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+ immAllOnesV))>;
+def vbclr_d : PatFrag<(ops node:$ws, node:$wt),
+ (and node:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt),
+ (bitconvert (v4i32 immAllOnesV))))>;
+
+def vbneg_b : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_h : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_w : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbneg_d : PatFrag<(ops node:$ws, node:$wt),
+ (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt))>;
+
+def vbset_b : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_h : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_w : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+def vbset_d : PatFrag<(ops node:$ws, node:$wt),
+ (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+ node:$wt))>;
+
+def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (fsub node:$wd, (fmul node:$ws, node:$wt))>;
+
+def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (add node:$wd, (mul node:$ws, node:$wt))>;
+
+def mulsub : PatFrag<(ops node:$wd, node:$ws, node:$wt),
+ (sub node:$wd, (mul node:$ws, node:$wt))>;
+
+def mul_fexp2 : PatFrag<(ops node:$ws, node:$wt),
+ (fmul node:$ws, (fexp2 node:$wt))>;
+
+// Immediates
+def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
+def immSExt10: ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
+
+// Instruction encoding.
+class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>;
+class ADD_A_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010000>;
+class ADD_A_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010000>;
+class ADD_A_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010000>;
+
+class ADDS_A_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010000>;
+class ADDS_A_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010000>;
+class ADDS_A_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010000>;
+class ADDS_A_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010000>;
+
+class ADDS_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010000>;
+class ADDS_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010000>;
+class ADDS_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010000>;
+class ADDS_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010000>;
+
+class ADDS_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010000>;
+class ADDS_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010000>;
+class ADDS_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010000>;
+class ADDS_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010000>;
+
+class ADDV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001110>;
+class ADDV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001110>;
+class ADDV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001110>;
+class ADDV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001110>;
+
+class ADDVI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000110>;
+class ADDVI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000110>;
+class ADDVI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000110>;
+class ADDVI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000110>;
+
+class AND_V_ENC : MSA_VEC_FMT<0b00000, 0b011110>;
+
+class ANDI_B_ENC : MSA_I8_FMT<0b00, 0b000000>;
+
+class ASUB_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010001>;
+class ASUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010001>;
+class ASUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010001>;
+class ASUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010001>;
+
+class ASUB_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010001>;
+class ASUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010001>;
+class ASUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010001>;
+class ASUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010001>;
+
+class AVE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010000>;
+class AVE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010000>;
+class AVE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010000>;
+class AVE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010000>;
+
+class AVE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010000>;
+class AVE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010000>;
+class AVE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010000>;
+class AVE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010000>;
+
+class AVER_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010000>;
+class AVER_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010000>;
+class AVER_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010000>;
+class AVER_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010000>;
+
+class AVER_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010000>;
+class AVER_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010000>;
+class AVER_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010000>;
+class AVER_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010000>;
+
+class BCLR_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001101>;
+class BCLR_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001101>;
+class BCLR_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001101>;
+class BCLR_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001101>;
+
+class BCLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001001>;
+class BCLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001001>;
+class BCLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001001>;
+class BCLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001001>;
+
+class BINSL_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001101>;
+class BINSL_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001101>;
+class BINSL_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001101>;
+class BINSL_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001101>;
+
+class BINSLI_B_ENC : MSA_BIT_B_FMT<0b110, 0b001001>;
+class BINSLI_H_ENC : MSA_BIT_H_FMT<0b110, 0b001001>;
+class BINSLI_W_ENC : MSA_BIT_W_FMT<0b110, 0b001001>;
+class BINSLI_D_ENC : MSA_BIT_D_FMT<0b110, 0b001001>;
+
+class BINSR_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001101>;
+class BINSR_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001101>;
+class BINSR_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001101>;
+class BINSR_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001101>;
+
+class BINSRI_B_ENC : MSA_BIT_B_FMT<0b111, 0b001001>;
+class BINSRI_H_ENC : MSA_BIT_H_FMT<0b111, 0b001001>;
+class BINSRI_W_ENC : MSA_BIT_W_FMT<0b111, 0b001001>;
+class BINSRI_D_ENC : MSA_BIT_D_FMT<0b111, 0b001001>;
+
+class BMNZ_V_ENC : MSA_VEC_FMT<0b00100, 0b011110>;
+
+class BMNZI_B_ENC : MSA_I8_FMT<0b00, 0b000001>;
+
+class BMZ_V_ENC : MSA_VEC_FMT<0b00101, 0b011110>;
+
+class BMZI_B_ENC : MSA_I8_FMT<0b01, 0b000001>;
+
+class BNEG_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001101>;
+class BNEG_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001101>;
+class BNEG_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001101>;
+class BNEG_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001101>;
+
+class BNEGI_B_ENC : MSA_BIT_B_FMT<0b101, 0b001001>;
+class BNEGI_H_ENC : MSA_BIT_H_FMT<0b101, 0b001001>;
+class BNEGI_W_ENC : MSA_BIT_W_FMT<0b101, 0b001001>;
+class BNEGI_D_ENC : MSA_BIT_D_FMT<0b101, 0b001001>;
+
+class BNZ_B_ENC : MSA_CBRANCH_FMT<0b111, 0b00>;
+class BNZ_H_ENC : MSA_CBRANCH_FMT<0b111, 0b01>;
+class BNZ_W_ENC : MSA_CBRANCH_FMT<0b111, 0b10>;
+class BNZ_D_ENC : MSA_CBRANCH_FMT<0b111, 0b11>;
+
+class BNZ_V_ENC : MSA_CBRANCH_V_FMT<0b01111>;
+
+class BSEL_V_ENC : MSA_VEC_FMT<0b00110, 0b011110>;
+
+class BSELI_B_ENC : MSA_I8_FMT<0b10, 0b000001>;
+
+class BSET_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001101>;
+class BSET_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001101>;
+class BSET_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001101>;
+class BSET_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001101>;
+
+class BSETI_B_ENC : MSA_BIT_B_FMT<0b100, 0b001001>;
+class BSETI_H_ENC : MSA_BIT_H_FMT<0b100, 0b001001>;
+class BSETI_W_ENC : MSA_BIT_W_FMT<0b100, 0b001001>;
+class BSETI_D_ENC : MSA_BIT_D_FMT<0b100, 0b001001>;
+
+class BZ_B_ENC : MSA_CBRANCH_FMT<0b110, 0b00>;
+class BZ_H_ENC : MSA_CBRANCH_FMT<0b110, 0b01>;
+class BZ_W_ENC : MSA_CBRANCH_FMT<0b110, 0b10>;
+class BZ_D_ENC : MSA_CBRANCH_FMT<0b110, 0b11>;
+
+class BZ_V_ENC : MSA_CBRANCH_V_FMT<0b01011>;
+
+class CEQ_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001111>;
+class CEQ_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001111>;
+class CEQ_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001111>;
+class CEQ_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001111>;
+
+class CEQI_B_ENC : MSA_I5_FMT<0b000, 0b00, 0b000111>;
+class CEQI_H_ENC : MSA_I5_FMT<0b000, 0b01, 0b000111>;
+class CEQI_W_ENC : MSA_I5_FMT<0b000, 0b10, 0b000111>;
+class CEQI_D_ENC : MSA_I5_FMT<0b000, 0b11, 0b000111>;
+
+class CFCMSA_ENC : MSA_ELM_CFCMSA_FMT<0b0001111110, 0b011001>;
+
+class CLE_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001111>;
+class CLE_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001111>;
+class CLE_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001111>;
+class CLE_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001111>;
+
+class CLE_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001111>;
+class CLE_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001111>;
+class CLE_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001111>;
+class CLE_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001111>;
+
+class CLEI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000111>;
+class CLEI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000111>;
+class CLEI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000111>;
+class CLEI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000111>;
+
+class CLEI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000111>;
+class CLEI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000111>;
+class CLEI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000111>;
+class CLEI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000111>;
+
+class CLT_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001111>;
+class CLT_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001111>;
+class CLT_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001111>;
+class CLT_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001111>;
+
+class CLT_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001111>;
+class CLT_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001111>;
+class CLT_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001111>;
+class CLT_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001111>;
+
+class CLTI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000111>;
+class CLTI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000111>;
+class CLTI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000111>;
+class CLTI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000111>;
+
+class CLTI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000111>;
+class CLTI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000111>;
+class CLTI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000111>;
+class CLTI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000111>;
+
+class COPY_S_B_ENC : MSA_ELM_COPY_B_FMT<0b0010, 0b011001>;
+class COPY_S_H_ENC : MSA_ELM_COPY_H_FMT<0b0010, 0b011001>;
+class COPY_S_W_ENC : MSA_ELM_COPY_W_FMT<0b0010, 0b011001>;
+
+class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
+class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
+class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
+
+class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
+
+class DIV_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010010>;
+class DIV_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010010>;
+class DIV_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010010>;
+class DIV_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010010>;
+
+class DIV_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010010>;
+class DIV_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010010>;
+class DIV_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010010>;
+class DIV_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010010>;
+
+class DOTP_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010011>;
+class DOTP_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010011>;
+class DOTP_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010011>;
+
+class DOTP_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010011>;
+class DOTP_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010011>;
+class DOTP_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010011>;
+
+class DPADD_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010011>;
+class DPADD_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010011>;
+class DPADD_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010011>;
+
+class DPADD_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010011>;
+class DPADD_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010011>;
+class DPADD_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010011>;
+
+class DPSUB_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010011>;
+class DPSUB_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010011>;
+class DPSUB_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010011>;
+
+class DPSUB_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010011>;
+class DPSUB_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010011>;
+class DPSUB_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010011>;
+
+class FADD_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011011>;
+class FADD_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011011>;
+
+class FCAF_W_ENC : MSA_3RF_FMT<0b0000, 0b0, 0b011010>;
+class FCAF_D_ENC : MSA_3RF_FMT<0b0000, 0b1, 0b011010>;
+
+class FCEQ_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011010>;
+class FCEQ_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011010>;
+
+class FCLASS_W_ENC : MSA_2RF_FMT<0b110010000, 0b0, 0b011110>;
+class FCLASS_D_ENC : MSA_2RF_FMT<0b110010000, 0b1, 0b011110>;
+
+class FCLE_W_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011010>;
+class FCLE_D_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011010>;
+
+class FCLT_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011010>;
+class FCLT_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011010>;
+
+class FCNE_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011100>;
+class FCNE_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011100>;
+
+class FCOR_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011100>;
+class FCOR_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011100>;
+
+class FCUEQ_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011010>;
+class FCUEQ_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011010>;
+
+class FCULE_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011010>;
+class FCULE_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011010>;
+
+class FCULT_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011010>;
+class FCULT_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011010>;
+
+class FCUN_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011010>;
+class FCUN_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011010>;
+
+class FCUNE_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011100>;
+class FCUNE_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011100>;
+
+class FDIV_W_ENC : MSA_3RF_FMT<0b0011, 0b0, 0b011011>;
+class FDIV_D_ENC : MSA_3RF_FMT<0b0011, 0b1, 0b011011>;
+
+class FEXDO_H_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011011>;
+class FEXDO_W_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011011>;
+
+class FEXP2_W_ENC : MSA_3RF_FMT<0b0111, 0b0, 0b011011>;
+class FEXP2_D_ENC : MSA_3RF_FMT<0b0111, 0b1, 0b011011>;
+
+class FEXUPL_W_ENC : MSA_2RF_FMT<0b110011000, 0b0, 0b011110>;
+class FEXUPL_D_ENC : MSA_2RF_FMT<0b110011000, 0b1, 0b011110>;
+
+class FEXUPR_W_ENC : MSA_2RF_FMT<0b110011001, 0b0, 0b011110>;
+class FEXUPR_D_ENC : MSA_2RF_FMT<0b110011001, 0b1, 0b011110>;
+
+class FFINT_S_W_ENC : MSA_2RF_FMT<0b110011110, 0b0, 0b011110>;
+class FFINT_S_D_ENC : MSA_2RF_FMT<0b110011110, 0b1, 0b011110>;
+
+class FFINT_U_W_ENC : MSA_2RF_FMT<0b110011111, 0b0, 0b011110>;
+class FFINT_U_D_ENC : MSA_2RF_FMT<0b110011111, 0b1, 0b011110>;
+
+class FFQL_W_ENC : MSA_2RF_FMT<0b110011010, 0b0, 0b011110>;
+class FFQL_D_ENC : MSA_2RF_FMT<0b110011010, 0b1, 0b011110>;
+
+class FFQR_W_ENC : MSA_2RF_FMT<0b110011011, 0b0, 0b011110>;
+class FFQR_D_ENC : MSA_2RF_FMT<0b110011011, 0b1, 0b011110>;
+
+class FILL_B_ENC : MSA_2R_FILL_FMT<0b11000000, 0b00, 0b011110>;
+class FILL_H_ENC : MSA_2R_FILL_FMT<0b11000000, 0b01, 0b011110>;
+class FILL_W_ENC : MSA_2R_FILL_FMT<0b11000000, 0b10, 0b011110>;
+
+class FLOG2_W_ENC : MSA_2RF_FMT<0b110010111, 0b0, 0b011110>;
+class FLOG2_D_ENC : MSA_2RF_FMT<0b110010111, 0b1, 0b011110>;
+
+class FMADD_W_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011011>;
+class FMADD_D_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011011>;
+
+class FMAX_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011011>;
+class FMAX_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011011>;
+
+class FMAX_A_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011011>;
+class FMAX_A_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011011>;
+
+class FMIN_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011011>;
+class FMIN_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011011>;
+
+class FMIN_A_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011011>;
+class FMIN_A_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011011>;
+
+class FMSUB_W_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011011>;
+class FMSUB_D_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011011>;
+
+class FMUL_W_ENC : MSA_3RF_FMT<0b0010, 0b0, 0b011011>;
+class FMUL_D_ENC : MSA_3RF_FMT<0b0010, 0b1, 0b011011>;
+
+class FRINT_W_ENC : MSA_2RF_FMT<0b110010110, 0b0, 0b011110>;
+class FRINT_D_ENC : MSA_2RF_FMT<0b110010110, 0b1, 0b011110>;
+
+class FRCP_W_ENC : MSA_2RF_FMT<0b110010101, 0b0, 0b011110>;
+class FRCP_D_ENC : MSA_2RF_FMT<0b110010101, 0b1, 0b011110>;
+
+class FRSQRT_W_ENC : MSA_2RF_FMT<0b110010100, 0b0, 0b011110>;
+class FRSQRT_D_ENC : MSA_2RF_FMT<0b110010100, 0b1, 0b011110>;
+
+class FSAF_W_ENC : MSA_3RF_FMT<0b1000, 0b0, 0b011010>;
+class FSAF_D_ENC : MSA_3RF_FMT<0b1000, 0b1, 0b011010>;
+
+class FSEQ_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011010>;
+class FSEQ_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011010>;
+
+class FSLE_W_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011010>;
+class FSLE_D_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011010>;
+
+class FSLT_W_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011010>;
+class FSLT_D_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011010>;
+
+class FSNE_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011100>;
+class FSNE_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011100>;
+
+class FSOR_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011100>;
+class FSOR_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011100>;
+
+class FSQRT_W_ENC : MSA_2RF_FMT<0b110010011, 0b0, 0b011110>;
+class FSQRT_D_ENC : MSA_2RF_FMT<0b110010011, 0b1, 0b011110>;
+
+class FSUB_W_ENC : MSA_3RF_FMT<0b0001, 0b0, 0b011011>;
+class FSUB_D_ENC : MSA_3RF_FMT<0b0001, 0b1, 0b011011>;
+
+class FSUEQ_W_ENC : MSA_3RF_FMT<0b1011, 0b0, 0b011010>;
+class FSUEQ_D_ENC : MSA_3RF_FMT<0b1011, 0b1, 0b011010>;
+
+class FSULE_W_ENC : MSA_3RF_FMT<0b1111, 0b0, 0b011010>;
+class FSULE_D_ENC : MSA_3RF_FMT<0b1111, 0b1, 0b011010>;
+
+class FSULT_W_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011010>;
+class FSULT_D_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011010>;
+
+class FSUN_W_ENC : MSA_3RF_FMT<0b1001, 0b0, 0b011010>;
+class FSUN_D_ENC : MSA_3RF_FMT<0b1001, 0b1, 0b011010>;
+
+class FSUNE_W_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011100>;
+class FSUNE_D_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011100>;
+
+class FTINT_S_W_ENC : MSA_2RF_FMT<0b110011100, 0b0, 0b011110>;
+class FTINT_S_D_ENC : MSA_2RF_FMT<0b110011100, 0b1, 0b011110>;
+
+class FTINT_U_W_ENC : MSA_2RF_FMT<0b110011101, 0b0, 0b011110>;
+class FTINT_U_D_ENC : MSA_2RF_FMT<0b110011101, 0b1, 0b011110>;
+
+class FTQ_H_ENC : MSA_3RF_FMT<0b1010, 0b0, 0b011011>;
+class FTQ_W_ENC : MSA_3RF_FMT<0b1010, 0b1, 0b011011>;
+
+class FTRUNC_S_W_ENC : MSA_2RF_FMT<0b110010001, 0b0, 0b011110>;
+class FTRUNC_S_D_ENC : MSA_2RF_FMT<0b110010001, 0b1, 0b011110>;
+
+class FTRUNC_U_W_ENC : MSA_2RF_FMT<0b110010010, 0b0, 0b011110>;
+class FTRUNC_U_D_ENC : MSA_2RF_FMT<0b110010010, 0b1, 0b011110>;
+
+class HADD_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010101>;
+class HADD_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010101>;
+class HADD_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010101>;
+
+class HADD_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010101>;
+class HADD_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010101>;
+class HADD_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010101>;
+
+class HSUB_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010101>;
+class HSUB_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010101>;
+class HSUB_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010101>;
+
+class HSUB_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010101>;
+class HSUB_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010101>;
+class HSUB_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010101>;
+
+class ILVEV_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010100>;
+class ILVEV_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010100>;
+class ILVEV_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010100>;
+class ILVEV_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010100>;
+
+class ILVL_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b010100>;
+class ILVL_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b010100>;
+class ILVL_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b010100>;
+class ILVL_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b010100>;
+
+class ILVOD_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010100>;
+class ILVOD_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010100>;
+class ILVOD_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010100>;
+class ILVOD_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010100>;
+
+class ILVR_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b010100>;
+class ILVR_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b010100>;
+class ILVR_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b010100>;
+class ILVR_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b010100>;
+
+class INSERT_B_ENC : MSA_ELM_INSERT_B_FMT<0b0100, 0b011001>;
+class INSERT_H_ENC : MSA_ELM_INSERT_H_FMT<0b0100, 0b011001>;
+class INSERT_W_ENC : MSA_ELM_INSERT_W_FMT<0b0100, 0b011001>;
+
+class INSVE_B_ENC : MSA_ELM_B_FMT<0b0101, 0b011001>;
+class INSVE_H_ENC : MSA_ELM_H_FMT<0b0101, 0b011001>;
+class INSVE_W_ENC : MSA_ELM_W_FMT<0b0101, 0b011001>;
+class INSVE_D_ENC : MSA_ELM_D_FMT<0b0101, 0b011001>;
+
+class LD_B_ENC : MSA_MI10_FMT<0b00, 0b1000>;
+class LD_H_ENC : MSA_MI10_FMT<0b01, 0b1000>;
+class LD_W_ENC : MSA_MI10_FMT<0b10, 0b1000>;
+class LD_D_ENC : MSA_MI10_FMT<0b11, 0b1000>;
+
+class LDI_B_ENC : MSA_I10_FMT<0b110, 0b00, 0b000111>;
+class LDI_H_ENC : MSA_I10_FMT<0b110, 0b01, 0b000111>;
+class LDI_W_ENC : MSA_I10_FMT<0b110, 0b10, 0b000111>;
+class LDI_D_ENC : MSA_I10_FMT<0b110, 0b11, 0b000111>;
+
+class LSA_ENC : SPECIAL_LSA_FMT<0b000101>;
+
+class MADD_Q_H_ENC : MSA_3RF_FMT<0b0101, 0b0, 0b011100>;
+class MADD_Q_W_ENC : MSA_3RF_FMT<0b0101, 0b1, 0b011100>;
+
+class MADDR_Q_H_ENC : MSA_3RF_FMT<0b1101, 0b0, 0b011100>;
+class MADDR_Q_W_ENC : MSA_3RF_FMT<0b1101, 0b1, 0b011100>;
+
+class MADDV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010010>;
+class MADDV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010010>;
+class MADDV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010010>;
+class MADDV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010010>;
+
+class MAX_A_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b001110>;
+class MAX_A_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b001110>;
+class MAX_A_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b001110>;
+class MAX_A_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b001110>;
+
+class MAX_S_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001110>;
+class MAX_S_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001110>;
+class MAX_S_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001110>;
+class MAX_S_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001110>;
+
+class MAX_U_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b001110>;
+class MAX_U_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b001110>;
+class MAX_U_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b001110>;
+class MAX_U_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b001110>;
+
+class MAXI_S_B_ENC : MSA_I5_FMT<0b010, 0b00, 0b000110>;
+class MAXI_S_H_ENC : MSA_I5_FMT<0b010, 0b01, 0b000110>;
+class MAXI_S_W_ENC : MSA_I5_FMT<0b010, 0b10, 0b000110>;
+class MAXI_S_D_ENC : MSA_I5_FMT<0b010, 0b11, 0b000110>;
+
+class MAXI_U_B_ENC : MSA_I5_FMT<0b011, 0b00, 0b000110>;
+class MAXI_U_H_ENC : MSA_I5_FMT<0b011, 0b01, 0b000110>;
+class MAXI_U_W_ENC : MSA_I5_FMT<0b011, 0b10, 0b000110>;
+class MAXI_U_D_ENC : MSA_I5_FMT<0b011, 0b11, 0b000110>;
+
+class MIN_A_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b001110>;
+class MIN_A_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b001110>;
+class MIN_A_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b001110>;
+class MIN_A_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b001110>;
+
+class MIN_S_B_ENC : MSA_3R_FMT<0b100, 0b00, 0b001110>;
+class MIN_S_H_ENC : MSA_3R_FMT<0b100, 0b01, 0b001110>;
+class MIN_S_W_ENC : MSA_3R_FMT<0b100, 0b10, 0b001110>;
+class MIN_S_D_ENC : MSA_3R_FMT<0b100, 0b11, 0b001110>;
+
+class MIN_U_B_ENC : MSA_3R_FMT<0b101, 0b00, 0b001110>;
+class MIN_U_H_ENC : MSA_3R_FMT<0b101, 0b01, 0b001110>;
+class MIN_U_W_ENC : MSA_3R_FMT<0b101, 0b10, 0b001110>;
+class MIN_U_D_ENC : MSA_3R_FMT<0b101, 0b11, 0b001110>;
+
+class MINI_S_B_ENC : MSA_I5_FMT<0b100, 0b00, 0b000110>;
+class MINI_S_H_ENC : MSA_I5_FMT<0b100, 0b01, 0b000110>;
+class MINI_S_W_ENC : MSA_I5_FMT<0b100, 0b10, 0b000110>;
+class MINI_S_D_ENC : MSA_I5_FMT<0b100, 0b11, 0b000110>;
+
+class MINI_U_B_ENC : MSA_I5_FMT<0b101, 0b00, 0b000110>;
+class MINI_U_H_ENC : MSA_I5_FMT<0b101, 0b01, 0b000110>;
+class MINI_U_W_ENC : MSA_I5_FMT<0b101, 0b10, 0b000110>;
+class MINI_U_D_ENC : MSA_I5_FMT<0b101, 0b11, 0b000110>;
+
+class MOD_S_B_ENC : MSA_3R_FMT<0b110, 0b00, 0b010010>;
+class MOD_S_H_ENC : MSA_3R_FMT<0b110, 0b01, 0b010010>;
+class MOD_S_W_ENC : MSA_3R_FMT<0b110, 0b10, 0b010010>;
+class MOD_S_D_ENC : MSA_3R_FMT<0b110, 0b11, 0b010010>;
+
+class MOD_U_B_ENC : MSA_3R_FMT<0b111, 0b00, 0b010010>;
+class MOD_U_H_ENC : MSA_3R_FMT<0b111, 0b01, 0b010010>;
+class MOD_U_W_ENC : MSA_3R_FMT<0b111, 0b10, 0b010010>;
+class MOD_U_D_ENC : MSA_3R_FMT<0b111, 0b11, 0b010010>;
+
+class MOVE_V_ENC : MSA_ELM_FMT<0b0010111110, 0b011001>;
+
+class MSUB_Q_H_ENC : MSA_3RF_FMT<0b0110, 0b0, 0b011100>;
+class MSUB_Q_W_ENC : MSA_3RF_FMT<0b0110, 0b1, 0b011100>;
+
+class MSUBR_Q_H_ENC : MSA_3RF_FMT<0b1110, 0b0, 0b011100>;
+class MSUBR_Q_W_ENC : MSA_3RF_FMT<0b1110, 0b1, 0b011100>;
+
+class MSUBV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010010>;
+class MSUBV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010010>;
+class MSUBV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010010>;
+class MSUBV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010010>;
+
+class MUL_Q_H_ENC : MSA_3RF_FMT<0b0100, 0b0, 0b011100>;
+class MUL_Q_W_ENC : MSA_3RF_FMT<0b0100, 0b1, 0b011100>;
+
+class MULR_Q_H_ENC : MSA_3RF_FMT<0b1100, 0b0, 0b011100>;
+class MULR_Q_W_ENC : MSA_3RF_FMT<0b1100, 0b1, 0b011100>;
+
+class MULV_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010010>;
+class MULV_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010010>;
+class MULV_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010010>;
+class MULV_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010010>;
+
+class NLOC_B_ENC : MSA_2R_FMT<0b11000010, 0b00, 0b011110>;
+class NLOC_H_ENC : MSA_2R_FMT<0b11000010, 0b01, 0b011110>;
+class NLOC_W_ENC : MSA_2R_FMT<0b11000010, 0b10, 0b011110>;
+class NLOC_D_ENC : MSA_2R_FMT<0b11000010, 0b11, 0b011110>;
+
+class NLZC_B_ENC : MSA_2R_FMT<0b11000011, 0b00, 0b011110>;
+class NLZC_H_ENC : MSA_2R_FMT<0b11000011, 0b01, 0b011110>;
+class NLZC_W_ENC : MSA_2R_FMT<0b11000011, 0b10, 0b011110>;
+class NLZC_D_ENC : MSA_2R_FMT<0b11000011, 0b11, 0b011110>;
+
+class NOR_V_ENC : MSA_VEC_FMT<0b00010, 0b011110>;
+
+class NORI_B_ENC : MSA_I8_FMT<0b10, 0b000000>;
+
+class OR_V_ENC : MSA_VEC_FMT<0b00001, 0b011110>;
+
+class ORI_B_ENC : MSA_I8_FMT<0b01, 0b000000>;
+
+class PCKEV_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010100>;
+class PCKEV_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010100>;
+class PCKEV_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010100>;
+class PCKEV_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010100>;
+
+class PCKOD_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010100>;
+class PCKOD_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010100>;
+class PCKOD_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010100>;
+class PCKOD_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010100>;
+
+class PCNT_B_ENC : MSA_2R_FMT<0b11000001, 0b00, 0b011110>;
+class PCNT_H_ENC : MSA_2R_FMT<0b11000001, 0b01, 0b011110>;
+class PCNT_W_ENC : MSA_2R_FMT<0b11000001, 0b10, 0b011110>;
+class PCNT_D_ENC : MSA_2R_FMT<0b11000001, 0b11, 0b011110>;
+
+class SAT_S_B_ENC : MSA_BIT_B_FMT<0b000, 0b001010>;
+class SAT_S_H_ENC : MSA_BIT_H_FMT<0b000, 0b001010>;
+class SAT_S_W_ENC : MSA_BIT_W_FMT<0b000, 0b001010>;
+class SAT_S_D_ENC : MSA_BIT_D_FMT<0b000, 0b001010>;
+
+class SAT_U_B_ENC : MSA_BIT_B_FMT<0b001, 0b001010>;
+class SAT_U_H_ENC : MSA_BIT_H_FMT<0b001, 0b001010>;
+class SAT_U_W_ENC : MSA_BIT_W_FMT<0b001, 0b001010>;
+class SAT_U_D_ENC : MSA_BIT_D_FMT<0b001, 0b001010>;
+
+class SHF_B_ENC : MSA_I8_FMT<0b00, 0b000010>;
+class SHF_H_ENC : MSA_I8_FMT<0b01, 0b000010>;
+class SHF_W_ENC : MSA_I8_FMT<0b10, 0b000010>;
+
+class SLD_B_ENC : MSA_3R_INDEX_FMT<0b000, 0b00, 0b010100>;
+class SLD_H_ENC : MSA_3R_INDEX_FMT<0b000, 0b01, 0b010100>;
+class SLD_W_ENC : MSA_3R_INDEX_FMT<0b000, 0b10, 0b010100>;
+class SLD_D_ENC : MSA_3R_INDEX_FMT<0b000, 0b11, 0b010100>;
+
+class SLDI_B_ENC : MSA_ELM_B_FMT<0b0000, 0b011001>;
+class SLDI_H_ENC : MSA_ELM_H_FMT<0b0000, 0b011001>;
+class SLDI_W_ENC : MSA_ELM_W_FMT<0b0000, 0b011001>;
+class SLDI_D_ENC : MSA_ELM_D_FMT<0b0000, 0b011001>;
+
+class SLL_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b001101>;
+class SLL_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b001101>;
+class SLL_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b001101>;
+class SLL_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b001101>;
+
+class SLLI_B_ENC : MSA_BIT_B_FMT<0b000, 0b001001>;
+class SLLI_H_ENC : MSA_BIT_H_FMT<0b000, 0b001001>;
+class SLLI_W_ENC : MSA_BIT_W_FMT<0b000, 0b001001>;
+class SLLI_D_ENC : MSA_BIT_D_FMT<0b000, 0b001001>;
+
+class SPLAT_B_ENC : MSA_3R_INDEX_FMT<0b001, 0b00, 0b010100>;
+class SPLAT_H_ENC : MSA_3R_INDEX_FMT<0b001, 0b01, 0b010100>;
+class SPLAT_W_ENC : MSA_3R_INDEX_FMT<0b001, 0b10, 0b010100>;
+class SPLAT_D_ENC : MSA_3R_INDEX_FMT<0b001, 0b11, 0b010100>;
+
+class SPLATI_B_ENC : MSA_ELM_B_FMT<0b0001, 0b011001>;
+class SPLATI_H_ENC : MSA_ELM_H_FMT<0b0001, 0b011001>;
+class SPLATI_W_ENC : MSA_ELM_W_FMT<0b0001, 0b011001>;
+class SPLATI_D_ENC : MSA_ELM_D_FMT<0b0001, 0b011001>;
+
+class SRA_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001101>;
+class SRA_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001101>;
+class SRA_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001101>;
+class SRA_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001101>;
+
+class SRAI_B_ENC : MSA_BIT_B_FMT<0b001, 0b001001>;
+class SRAI_H_ENC : MSA_BIT_H_FMT<0b001, 0b001001>;
+class SRAI_W_ENC : MSA_BIT_W_FMT<0b001, 0b001001>;
+class SRAI_D_ENC : MSA_BIT_D_FMT<0b001, 0b001001>;
+
+class SRAR_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010101>;
+class SRAR_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010101>;
+class SRAR_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010101>;
+class SRAR_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010101>;
+
+class SRARI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001010>;
+class SRARI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001010>;
+class SRARI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001010>;
+class SRARI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001010>;
+
+class SRL_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b001101>;
+class SRL_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b001101>;
+class SRL_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b001101>;
+class SRL_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b001101>;
+
+class SRLI_B_ENC : MSA_BIT_B_FMT<0b010, 0b001001>;
+class SRLI_H_ENC : MSA_BIT_H_FMT<0b010, 0b001001>;
+class SRLI_W_ENC : MSA_BIT_W_FMT<0b010, 0b001001>;
+class SRLI_D_ENC : MSA_BIT_D_FMT<0b010, 0b001001>;
+
+class SRLR_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010101>;
+class SRLR_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010101>;
+class SRLR_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010101>;
+class SRLR_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010101>;
+
+class SRLRI_B_ENC : MSA_BIT_B_FMT<0b011, 0b001010>;
+class SRLRI_H_ENC : MSA_BIT_H_FMT<0b011, 0b001010>;
+class SRLRI_W_ENC : MSA_BIT_W_FMT<0b011, 0b001010>;
+class SRLRI_D_ENC : MSA_BIT_D_FMT<0b011, 0b001010>;
+
+class ST_B_ENC : MSA_MI10_FMT<0b00, 0b1001>;
+class ST_H_ENC : MSA_MI10_FMT<0b01, 0b1001>;
+class ST_W_ENC : MSA_MI10_FMT<0b10, 0b1001>;
+class ST_D_ENC : MSA_MI10_FMT<0b11, 0b1001>;
+
+class SUBS_S_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010001>;
+class SUBS_S_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010001>;
+class SUBS_S_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010001>;
+class SUBS_S_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010001>;
+
+class SUBS_U_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b010001>;
+class SUBS_U_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b010001>;
+class SUBS_U_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b010001>;
+class SUBS_U_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b010001>;
+
+class SUBSUS_U_B_ENC : MSA_3R_FMT<0b010, 0b00, 0b010001>;
+class SUBSUS_U_H_ENC : MSA_3R_FMT<0b010, 0b01, 0b010001>;
+class SUBSUS_U_W_ENC : MSA_3R_FMT<0b010, 0b10, 0b010001>;
+class SUBSUS_U_D_ENC : MSA_3R_FMT<0b010, 0b11, 0b010001>;
+
+class SUBSUU_S_B_ENC : MSA_3R_FMT<0b011, 0b00, 0b010001>;
+class SUBSUU_S_H_ENC : MSA_3R_FMT<0b011, 0b01, 0b010001>;
+class SUBSUU_S_W_ENC : MSA_3R_FMT<0b011, 0b10, 0b010001>;
+class SUBSUU_S_D_ENC : MSA_3R_FMT<0b011, 0b11, 0b010001>;
+
+class SUBV_B_ENC : MSA_3R_FMT<0b001, 0b00, 0b001110>;
+class SUBV_H_ENC : MSA_3R_FMT<0b001, 0b01, 0b001110>;
+class SUBV_W_ENC : MSA_3R_FMT<0b001, 0b10, 0b001110>;
+class SUBV_D_ENC : MSA_3R_FMT<0b001, 0b11, 0b001110>;
+
+class SUBVI_B_ENC : MSA_I5_FMT<0b001, 0b00, 0b000110>;
+class SUBVI_H_ENC : MSA_I5_FMT<0b001, 0b01, 0b000110>;
+class SUBVI_W_ENC : MSA_I5_FMT<0b001, 0b10, 0b000110>;
+class SUBVI_D_ENC : MSA_I5_FMT<0b001, 0b11, 0b000110>;
+
+class VSHF_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010101>;
+class VSHF_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010101>;
+class VSHF_W_ENC : MSA_3R_FMT<0b000, 0b10, 0b010101>;
+class VSHF_D_ENC : MSA_3R_FMT<0b000, 0b11, 0b010101>;
+
+class XOR_V_ENC : MSA_VEC_FMT<0b00011, 0b011110>;
+
+class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>;
+
+// Instruction desc.
+class MSA_BIT_B_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm3:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_H_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm4:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm5:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ComplexPattern Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, vsplat_uimm6:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_B_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm3:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_H_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_W_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm5:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed soon.
+class MSA_BIT_D_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm6:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_BIT_BINSXI_DESC_BASE<string instr_asm, ValueType Ty,
+ ComplexPattern Mask, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, vsplat_uimm8:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (vselect (Ty Mask:$m), (Ty ROWD:$wd_in),
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_BIT_BINSLI_DESC_BASE<string instr_asm, ValueType Ty,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_BIT_BINSXI_DESC_BASE<instr_asm, Ty, vsplat_maskl_bits, ROWD, ROWS, itin>;
+
+class MSA_BIT_BINSRI_DESC_BASE<string instr_asm, ValueType Ty,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_BIT_BINSXI_DESC_BASE<instr_asm, Ty, vsplat_maskr_bits, ROWD, ROWS, itin>;
+
+class MSA_BIT_SPLAT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$m);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$m))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType VecTy, RegisterOperand ROD,
+ RegisterOperand ROWS,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROD:$rd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$n);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $ws[$n]");
+ list<dag> Pattern = [(set ROD:$rd, (OpNode (VecTy ROWS:$ws), immZExt4:$n))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_ELM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm4:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$n))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_COPY_PSEUDO_BASE<SDPatternOperator OpNode, ValueType VecTy,
+ RegisterClass RCD, RegisterClass RCWS> :
+ MipsPseudo<(outs RCD:$wd), (ins RCWS:$ws, uimm4:$n),
+ [(set RCD:$wd, (OpNode (VecTy RCWS:$ws), immZExt4:$n))]> {
+ bit usesCustomInserter = 1;
+}
+
+class MSA_I5_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $imm");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$imm))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I8_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SplatComplexPattern SplatImm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, SplatImm:$u8))];
+ InstrItinClass Itinerary = itin;
+}
+
+// This class is deprecated and will be removed in the next few patches
+class MSA_I8_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt8:$u8))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_I10_LDI_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins vsplat_simm10:$s10);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $s10");
+ // LDI is matched using custom matching code in MipsSEISelDAGToDAG.cpp
+ list<dag> Pattern = [];
+ bit hasSideEffects = 0;
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_FILL_DESC_BASE<string instr_asm, ValueType VT,
+ SDPatternOperator OpNode, RegisterOperand ROWD,
+ RegisterOperand ROS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROS:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $rs");
+ list<dag> Pattern = [(set ROWD:$wd, (VT (OpNode ROS:$rs)))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
+ RegisterClass RCWD, RegisterClass RCWS = RCWD> :
+ MipsPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
+ [(set RCWD:$wd, (OpNode RCWS:$fs))]> {
+ let usesCustomInserter = 1;
+}
+
+class MSA_2RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_BINSX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws,
+ ROWT:$wt))];
+ string Constraints = "$wd = $wd_in";
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_SPLAT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_VSHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsVSHF ROWD:$wd_in, ROWS:$ws,
+ ROWT:$wt))];
+ string Constraints = "$wd = $wd_in";
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, GPR32:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$rt]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, GPR32:$rt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_3R_4R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd,
+ (OpNode ROWD:$wd_in, ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_3RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_3R_DESC_BASE<instr_asm, OpNode, ROWD, ROWS, ROWT, itin>;
+
+class MSA_3RF_4RF_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MSA_3R_4R_DESC_BASE<instr_asm, OpNode, ROWD, ROWS, ROWT, itin>;
+
+class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterOperand ROWD> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins ROWD:$wt, brtarget:$offset);
+ string AsmString = !strconcat(instr_asm, "\t$wt, $offset");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = IIBranch;
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 1;
+ list<Register> Defs = [AT];
+}
+
+class MSA_INSERT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROS,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, ROS:$rs, uimm6:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd[$n], $rs");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
+ ROS:$rs,
+ immZExt6:$n))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_INSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
+ RegisterOperand ROWD, RegisterOperand ROFS> :
+ MipsPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, uimm6:$n, ROFS:$fs),
+ [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
+ immZExt6:$n))]> {
+ bit usesCustomInserter = 1;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws);
+ string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[0]");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
+ immZExt6:$n,
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+ string Constraints = "$wd = $wd_in";
+}
+
+class MSA_VEC_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, ROWT:$wt);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $wt");
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_ELM_SPLAT_DESC_BASE<string instr_asm, SplatComplexPattern SplatImm,
+ RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins ROWS:$ws, SplatImm.OpClass:$n);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
+ list<dag> Pattern = [(set ROWD:$wd, (MipsVSHF SplatImm:$n, ROWS:$ws,
+ ROWS:$ws))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MSA_VEC_PSEUDO_BASE<SDPatternOperator OpNode, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWT = ROWD> :
+ MipsPseudo<(outs ROWD:$wd), (ins ROWS:$ws, ROWT:$wt),
+ [(set ROWD:$wd, (OpNode ROWS:$ws, ROWT:$wt))]>;
+
+class ADD_A_B_DESC : MSA_3R_DESC_BASE<"add_a.b", int_mips_add_a_b, MSA128BOpnd>,
+ IsCommutable;
+class ADD_A_H_DESC : MSA_3R_DESC_BASE<"add_a.h", int_mips_add_a_h, MSA128HOpnd>,
+ IsCommutable;
+class ADD_A_W_DESC : MSA_3R_DESC_BASE<"add_a.w", int_mips_add_a_w, MSA128WOpnd>,
+ IsCommutable;
+class ADD_A_D_DESC : MSA_3R_DESC_BASE<"add_a.d", int_mips_add_a_d, MSA128DOpnd>,
+ IsCommutable;
+
+class ADDS_A_B_DESC : MSA_3R_DESC_BASE<"adds_a.b", int_mips_adds_a_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_A_H_DESC : MSA_3R_DESC_BASE<"adds_a.h", int_mips_adds_a_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_A_W_DESC : MSA_3R_DESC_BASE<"adds_a.w", int_mips_adds_a_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_A_D_DESC : MSA_3R_DESC_BASE<"adds_a.d", int_mips_adds_a_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDS_S_B_DESC : MSA_3R_DESC_BASE<"adds_s.b", int_mips_adds_s_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_S_H_DESC : MSA_3R_DESC_BASE<"adds_s.h", int_mips_adds_s_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_S_W_DESC : MSA_3R_DESC_BASE<"adds_s.w", int_mips_adds_s_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_S_D_DESC : MSA_3R_DESC_BASE<"adds_s.d", int_mips_adds_s_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDS_U_B_DESC : MSA_3R_DESC_BASE<"adds_u.b", int_mips_adds_u_b,
+ MSA128BOpnd>, IsCommutable;
+class ADDS_U_H_DESC : MSA_3R_DESC_BASE<"adds_u.h", int_mips_adds_u_h,
+ MSA128HOpnd>, IsCommutable;
+class ADDS_U_W_DESC : MSA_3R_DESC_BASE<"adds_u.w", int_mips_adds_u_w,
+ MSA128WOpnd>, IsCommutable;
+class ADDS_U_D_DESC : MSA_3R_DESC_BASE<"adds_u.d", int_mips_adds_u_d,
+ MSA128DOpnd>, IsCommutable;
+
+class ADDV_B_DESC : MSA_3R_DESC_BASE<"addv.b", add, MSA128BOpnd>, IsCommutable;
+class ADDV_H_DESC : MSA_3R_DESC_BASE<"addv.h", add, MSA128HOpnd>, IsCommutable;
+class ADDV_W_DESC : MSA_3R_DESC_BASE<"addv.w", add, MSA128WOpnd>, IsCommutable;
+class ADDV_D_DESC : MSA_3R_DESC_BASE<"addv.d", add, MSA128DOpnd>, IsCommutable;
+
+class ADDVI_B_DESC : MSA_I5_DESC_BASE<"addvi.b", add, vsplati8_uimm5,
+ MSA128BOpnd>;
+class ADDVI_H_DESC : MSA_I5_DESC_BASE<"addvi.h", add, vsplati16_uimm5,
+ MSA128HOpnd>;
+class ADDVI_W_DESC : MSA_I5_DESC_BASE<"addvi.w", add, vsplati32_uimm5,
+ MSA128WOpnd>;
+class ADDVI_D_DESC : MSA_I5_DESC_BASE<"addvi.d", add, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class AND_V_DESC : MSA_VEC_DESC_BASE<"and.v", and, MSA128BOpnd>;
+class AND_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128HOpnd>;
+class AND_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128WOpnd>;
+class AND_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<and, MSA128DOpnd>;
+
+class ANDI_B_DESC : MSA_I8_DESC_BASE<"andi.b", and, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+class ASUB_S_B_DESC : MSA_3R_DESC_BASE<"asub_s.b", int_mips_asub_s_b,
+ MSA128BOpnd>;
+class ASUB_S_H_DESC : MSA_3R_DESC_BASE<"asub_s.h", int_mips_asub_s_h,
+ MSA128HOpnd>;
+class ASUB_S_W_DESC : MSA_3R_DESC_BASE<"asub_s.w", int_mips_asub_s_w,
+ MSA128WOpnd>;
+class ASUB_S_D_DESC : MSA_3R_DESC_BASE<"asub_s.d", int_mips_asub_s_d,
+ MSA128DOpnd>;
+
+class ASUB_U_B_DESC : MSA_3R_DESC_BASE<"asub_u.b", int_mips_asub_u_b,
+ MSA128BOpnd>;
+class ASUB_U_H_DESC : MSA_3R_DESC_BASE<"asub_u.h", int_mips_asub_u_h,
+ MSA128HOpnd>;
+class ASUB_U_W_DESC : MSA_3R_DESC_BASE<"asub_u.w", int_mips_asub_u_w,
+ MSA128WOpnd>;
+class ASUB_U_D_DESC : MSA_3R_DESC_BASE<"asub_u.d", int_mips_asub_u_d,
+ MSA128DOpnd>;
+
+class AVE_S_B_DESC : MSA_3R_DESC_BASE<"ave_s.b", int_mips_ave_s_b, MSA128BOpnd>,
+ IsCommutable;
+class AVE_S_H_DESC : MSA_3R_DESC_BASE<"ave_s.h", int_mips_ave_s_h, MSA128HOpnd>,
+ IsCommutable;
+class AVE_S_W_DESC : MSA_3R_DESC_BASE<"ave_s.w", int_mips_ave_s_w, MSA128WOpnd>,
+ IsCommutable;
+class AVE_S_D_DESC : MSA_3R_DESC_BASE<"ave_s.d", int_mips_ave_s_d, MSA128DOpnd>,
+ IsCommutable;
+
+class AVE_U_B_DESC : MSA_3R_DESC_BASE<"ave_u.b", int_mips_ave_u_b, MSA128BOpnd>,
+ IsCommutable;
+class AVE_U_H_DESC : MSA_3R_DESC_BASE<"ave_u.h", int_mips_ave_u_h, MSA128HOpnd>,
+ IsCommutable;
+class AVE_U_W_DESC : MSA_3R_DESC_BASE<"ave_u.w", int_mips_ave_u_w, MSA128WOpnd>,
+ IsCommutable;
+class AVE_U_D_DESC : MSA_3R_DESC_BASE<"ave_u.d", int_mips_ave_u_d, MSA128DOpnd>,
+ IsCommutable;
+
+class AVER_S_B_DESC : MSA_3R_DESC_BASE<"aver_s.b", int_mips_aver_s_b,
+ MSA128BOpnd>, IsCommutable;
+class AVER_S_H_DESC : MSA_3R_DESC_BASE<"aver_s.h", int_mips_aver_s_h,
+ MSA128HOpnd>, IsCommutable;
+class AVER_S_W_DESC : MSA_3R_DESC_BASE<"aver_s.w", int_mips_aver_s_w,
+ MSA128WOpnd>, IsCommutable;
+class AVER_S_D_DESC : MSA_3R_DESC_BASE<"aver_s.d", int_mips_aver_s_d,
+ MSA128DOpnd>, IsCommutable;
+
+class AVER_U_B_DESC : MSA_3R_DESC_BASE<"aver_u.b", int_mips_aver_u_b,
+ MSA128BOpnd>, IsCommutable;
+class AVER_U_H_DESC : MSA_3R_DESC_BASE<"aver_u.h", int_mips_aver_u_h,
+ MSA128HOpnd>, IsCommutable;
+class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w,
+ MSA128WOpnd>, IsCommutable;
+class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d,
+ MSA128DOpnd>, IsCommutable;
+
+class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", vbclr_b, MSA128BOpnd>;
+class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", vbclr_h, MSA128HOpnd>;
+class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", vbclr_w, MSA128WOpnd>;
+class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", vbclr_d, MSA128DOpnd>;
+
+class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", and, vsplat_uimm_inv_pow2,
+ MSA128BOpnd>;
+class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", and, vsplat_uimm_inv_pow2,
+ MSA128HOpnd>;
+class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", and, vsplat_uimm_inv_pow2,
+ MSA128WOpnd>;
+class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", and, vsplat_uimm_inv_pow2,
+ MSA128DOpnd>;
+
+class BINSL_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.b", int_mips_binsl_b,
+ MSA128BOpnd>;
+class BINSL_H_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.h", int_mips_binsl_h,
+ MSA128HOpnd>;
+class BINSL_W_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.w", int_mips_binsl_w,
+ MSA128WOpnd>;
+class BINSL_D_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.d", int_mips_binsl_d,
+ MSA128DOpnd>;
+
+class BINSLI_B_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.b", v16i8, MSA128BOpnd>;
+class BINSLI_H_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.h", v8i16, MSA128HOpnd>;
+class BINSLI_W_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.w", v4i32, MSA128WOpnd>;
+class BINSLI_D_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.d", v2i64, MSA128DOpnd>;
+
+class BINSR_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.b", int_mips_binsr_b,
+ MSA128BOpnd>;
+class BINSR_H_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.h", int_mips_binsr_h,
+ MSA128HOpnd>;
+class BINSR_W_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.w", int_mips_binsr_w,
+ MSA128WOpnd>;
+class BINSR_D_DESC : MSA_3R_BINSX_DESC_BASE<"binsr.d", int_mips_binsr_d,
+ MSA128DOpnd>;
+
+class BINSRI_B_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.b", v16i8, MSA128BOpnd>;
+class BINSRI_H_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.h", v8i16, MSA128HOpnd>;
+class BINSRI_W_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.w", v4i32, MSA128WOpnd>;
+class BINSRI_D_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.d", v2i64, MSA128DOpnd>;
+
+class BMNZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmnz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMNZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmnzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BMZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", vbneg_b, MSA128BOpnd>;
+class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", vbneg_h, MSA128HOpnd>;
+class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", vbneg_w, MSA128WOpnd>;
+class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", vbneg_d, MSA128DOpnd>;
+
+class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, vsplat_uimm_pow2, MSA128BOpnd>;
+class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, vsplat_uimm_pow2, MSA128HOpnd>;
+class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, vsplat_uimm_pow2, MSA128WOpnd>;
+class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, vsplat_uimm_pow2, MSA128DOpnd>;
+
+class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128BOpnd>;
+class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128HOpnd>;
+class BNZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bnz.w", MSA128WOpnd>;
+class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128DOpnd>;
+
+class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128BOpnd>;
+
+class BSEL_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bsel.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd,
+ (vselect MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BSELI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bseli.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws,
+ vsplati8_uimm8:$u8))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
+
+class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", vbset_b, MSA128BOpnd>;
+class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", vbset_h, MSA128HOpnd>;
+class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", vbset_w, MSA128WOpnd>;
+class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", vbset_d, MSA128DOpnd>;
+
+class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", or, vsplat_uimm_pow2,
+ MSA128BOpnd>;
+class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", or, vsplat_uimm_pow2,
+ MSA128HOpnd>;
+class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", or, vsplat_uimm_pow2,
+ MSA128WOpnd>;
+class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", or, vsplat_uimm_pow2,
+ MSA128DOpnd>;
+
+class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128BOpnd>;
+class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128HOpnd>;
+class BZ_W_DESC : MSA_CBRANCH_DESC_BASE<"bz.w", MSA128WOpnd>;
+class BZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bz.d", MSA128DOpnd>;
+
+class BZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bz.v", MSA128BOpnd>;
+
+class CEQ_B_DESC : MSA_3R_DESC_BASE<"ceq.b", vseteq_v16i8, MSA128BOpnd>,
+ IsCommutable;
+class CEQ_H_DESC : MSA_3R_DESC_BASE<"ceq.h", vseteq_v8i16, MSA128HOpnd>,
+ IsCommutable;
+class CEQ_W_DESC : MSA_3R_DESC_BASE<"ceq.w", vseteq_v4i32, MSA128WOpnd>,
+ IsCommutable;
+class CEQ_D_DESC : MSA_3R_DESC_BASE<"ceq.d", vseteq_v2i64, MSA128DOpnd>,
+ IsCommutable;
+
+class CEQI_B_DESC : MSA_I5_DESC_BASE<"ceqi.b", vseteq_v16i8, vsplati8_simm5,
+ MSA128BOpnd>;
+class CEQI_H_DESC : MSA_I5_DESC_BASE<"ceqi.h", vseteq_v8i16, vsplati16_simm5,
+ MSA128HOpnd>;
+class CEQI_W_DESC : MSA_I5_DESC_BASE<"ceqi.w", vseteq_v4i32, vsplati32_simm5,
+ MSA128WOpnd>;
+class CEQI_D_DESC : MSA_I5_DESC_BASE<"ceqi.d", vseteq_v2i64, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class CFCMSA_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins MSA128CROpnd:$cs);
+ string AsmString = "cfcmsa\t$rd, $cs";
+ InstrItinClass Itinerary = NoItinerary;
+ bit hasSideEffects = 1;
+}
+
+class CLE_S_B_DESC : MSA_3R_DESC_BASE<"cle_s.b", vsetle_v16i8, MSA128BOpnd>;
+class CLE_S_H_DESC : MSA_3R_DESC_BASE<"cle_s.h", vsetle_v8i16, MSA128HOpnd>;
+class CLE_S_W_DESC : MSA_3R_DESC_BASE<"cle_s.w", vsetle_v4i32, MSA128WOpnd>;
+class CLE_S_D_DESC : MSA_3R_DESC_BASE<"cle_s.d", vsetle_v2i64, MSA128DOpnd>;
+
+class CLE_U_B_DESC : MSA_3R_DESC_BASE<"cle_u.b", vsetule_v16i8, MSA128BOpnd>;
+class CLE_U_H_DESC : MSA_3R_DESC_BASE<"cle_u.h", vsetule_v8i16, MSA128HOpnd>;
+class CLE_U_W_DESC : MSA_3R_DESC_BASE<"cle_u.w", vsetule_v4i32, MSA128WOpnd>;
+class CLE_U_D_DESC : MSA_3R_DESC_BASE<"cle_u.d", vsetule_v2i64, MSA128DOpnd>;
+
+class CLEI_S_B_DESC : MSA_I5_DESC_BASE<"clei_s.b", vsetle_v16i8,
+ vsplati8_simm5, MSA128BOpnd>;
+class CLEI_S_H_DESC : MSA_I5_DESC_BASE<"clei_s.h", vsetle_v8i16,
+ vsplati16_simm5, MSA128HOpnd>;
+class CLEI_S_W_DESC : MSA_I5_DESC_BASE<"clei_s.w", vsetle_v4i32,
+ vsplati32_simm5, MSA128WOpnd>;
+class CLEI_S_D_DESC : MSA_I5_DESC_BASE<"clei_s.d", vsetle_v2i64,
+ vsplati64_simm5, MSA128DOpnd>;
+
+class CLEI_U_B_DESC : MSA_I5_DESC_BASE<"clei_u.b", vsetule_v16i8,
+ vsplati8_uimm5, MSA128BOpnd>;
+class CLEI_U_H_DESC : MSA_I5_DESC_BASE<"clei_u.h", vsetule_v8i16,
+ vsplati16_uimm5, MSA128HOpnd>;
+class CLEI_U_W_DESC : MSA_I5_DESC_BASE<"clei_u.w", vsetule_v4i32,
+ vsplati32_uimm5, MSA128WOpnd>;
+class CLEI_U_D_DESC : MSA_I5_DESC_BASE<"clei_u.d", vsetule_v2i64,
+ vsplati64_uimm5, MSA128DOpnd>;
+
+class CLT_S_B_DESC : MSA_3R_DESC_BASE<"clt_s.b", vsetlt_v16i8, MSA128BOpnd>;
+class CLT_S_H_DESC : MSA_3R_DESC_BASE<"clt_s.h", vsetlt_v8i16, MSA128HOpnd>;
+class CLT_S_W_DESC : MSA_3R_DESC_BASE<"clt_s.w", vsetlt_v4i32, MSA128WOpnd>;
+class CLT_S_D_DESC : MSA_3R_DESC_BASE<"clt_s.d", vsetlt_v2i64, MSA128DOpnd>;
+
+class CLT_U_B_DESC : MSA_3R_DESC_BASE<"clt_u.b", vsetult_v16i8, MSA128BOpnd>;
+class CLT_U_H_DESC : MSA_3R_DESC_BASE<"clt_u.h", vsetult_v8i16, MSA128HOpnd>;
+class CLT_U_W_DESC : MSA_3R_DESC_BASE<"clt_u.w", vsetult_v4i32, MSA128WOpnd>;
+class CLT_U_D_DESC : MSA_3R_DESC_BASE<"clt_u.d", vsetult_v2i64, MSA128DOpnd>;
+
+class CLTI_S_B_DESC : MSA_I5_DESC_BASE<"clti_s.b", vsetlt_v16i8,
+ vsplati8_simm5, MSA128BOpnd>;
+class CLTI_S_H_DESC : MSA_I5_DESC_BASE<"clti_s.h", vsetlt_v8i16,
+ vsplati16_simm5, MSA128HOpnd>;
+class CLTI_S_W_DESC : MSA_I5_DESC_BASE<"clti_s.w", vsetlt_v4i32,
+ vsplati32_simm5, MSA128WOpnd>;
+class CLTI_S_D_DESC : MSA_I5_DESC_BASE<"clti_s.d", vsetlt_v2i64,
+ vsplati64_simm5, MSA128DOpnd>;
+
+class CLTI_U_B_DESC : MSA_I5_DESC_BASE<"clti_u.b", vsetult_v16i8,
+ vsplati8_uimm5, MSA128BOpnd>;
+class CLTI_U_H_DESC : MSA_I5_DESC_BASE<"clti_u.h", vsetult_v8i16,
+ vsplati16_uimm5, MSA128HOpnd>;
+class CLTI_U_W_DESC : MSA_I5_DESC_BASE<"clti_u.w", vsetult_v4i32,
+ vsplati32_uimm5, MSA128WOpnd>;
+class CLTI_U_D_DESC : MSA_I5_DESC_BASE<"clti_u.d", vsetult_v2i64,
+ vsplati64_uimm5, MSA128DOpnd>;
+
+class COPY_S_B_DESC : MSA_COPY_DESC_BASE<"copy_s.b", vextract_sext_i8, v16i8,
+ GPR32Opnd, MSA128BOpnd>;
+class COPY_S_H_DESC : MSA_COPY_DESC_BASE<"copy_s.h", vextract_sext_i16, v8i16,
+ GPR32Opnd, MSA128HOpnd>;
+class COPY_S_W_DESC : MSA_COPY_DESC_BASE<"copy_s.w", vextract_sext_i32, v4i32,
+ GPR32Opnd, MSA128WOpnd>;
+
+class COPY_U_B_DESC : MSA_COPY_DESC_BASE<"copy_u.b", vextract_zext_i8, v16i8,
+ GPR32Opnd, MSA128BOpnd>;
+class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
+ GPR32Opnd, MSA128HOpnd>;
+class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
+ GPR32Opnd, MSA128WOpnd>;
+
+class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
+ MSA128W>;
+class COPY_FD_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v2f64, FGR64,
+ MSA128D>;
+
+class CTCMSA_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MSA128CROpnd:$cd, GPR32Opnd:$rs);
+ string AsmString = "ctcmsa\t$cd, $rs";
+ InstrItinClass Itinerary = NoItinerary;
+ bit hasSideEffects = 1;
+}
+
+class DIV_S_B_DESC : MSA_3R_DESC_BASE<"div_s.b", sdiv, MSA128BOpnd>;
+class DIV_S_H_DESC : MSA_3R_DESC_BASE<"div_s.h", sdiv, MSA128HOpnd>;
+class DIV_S_W_DESC : MSA_3R_DESC_BASE<"div_s.w", sdiv, MSA128WOpnd>;
+class DIV_S_D_DESC : MSA_3R_DESC_BASE<"div_s.d", sdiv, MSA128DOpnd>;
+
+class DIV_U_B_DESC : MSA_3R_DESC_BASE<"div_u.b", udiv, MSA128BOpnd>;
+class DIV_U_H_DESC : MSA_3R_DESC_BASE<"div_u.h", udiv, MSA128HOpnd>;
+class DIV_U_W_DESC : MSA_3R_DESC_BASE<"div_u.w", udiv, MSA128WOpnd>;
+class DIV_U_D_DESC : MSA_3R_DESC_BASE<"div_u.d", udiv, MSA128DOpnd>;
+
+class DOTP_S_H_DESC : MSA_3R_DESC_BASE<"dotp_s.h", int_mips_dotp_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>,
+ IsCommutable;
+class DOTP_S_W_DESC : MSA_3R_DESC_BASE<"dotp_s.w", int_mips_dotp_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>,
+ IsCommutable;
+class DOTP_S_D_DESC : MSA_3R_DESC_BASE<"dotp_s.d", int_mips_dotp_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>,
+ IsCommutable;
+
+class DOTP_U_H_DESC : MSA_3R_DESC_BASE<"dotp_u.h", int_mips_dotp_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>,
+ IsCommutable;
+class DOTP_U_W_DESC : MSA_3R_DESC_BASE<"dotp_u.w", int_mips_dotp_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>,
+ IsCommutable;
+class DOTP_U_D_DESC : MSA_3R_DESC_BASE<"dotp_u.d", int_mips_dotp_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>,
+ IsCommutable;
+
+class DPADD_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.h", int_mips_dpadd_s_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>, IsCommutable;
+class DPADD_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.w", int_mips_dpadd_s_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>, IsCommutable;
+class DPADD_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_s.d", int_mips_dpadd_s_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>, IsCommutable;
+
+class DPADD_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.h", int_mips_dpadd_u_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>, IsCommutable;
+class DPADD_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.w", int_mips_dpadd_u_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>, IsCommutable;
+class DPADD_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpadd_u.d", int_mips_dpadd_u_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>, IsCommutable;
+
+class DPSUB_S_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.h", int_mips_dpsub_s_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>;
+class DPSUB_S_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.w", int_mips_dpsub_s_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>;
+class DPSUB_S_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_s.d", int_mips_dpsub_s_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>;
+
+class DPSUB_U_H_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.h", int_mips_dpsub_u_h,
+ MSA128HOpnd, MSA128BOpnd,
+ MSA128BOpnd>;
+class DPSUB_U_W_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.w", int_mips_dpsub_u_w,
+ MSA128WOpnd, MSA128HOpnd,
+ MSA128HOpnd>;
+class DPSUB_U_D_DESC : MSA_3R_4R_DESC_BASE<"dpsub_u.d", int_mips_dpsub_u_d,
+ MSA128DOpnd, MSA128WOpnd,
+ MSA128WOpnd>;
+
+class FADD_W_DESC : MSA_3RF_DESC_BASE<"fadd.w", fadd, MSA128WOpnd>,
+ IsCommutable;
+class FADD_D_DESC : MSA_3RF_DESC_BASE<"fadd.d", fadd, MSA128DOpnd>,
+ IsCommutable;
+
+class FCAF_W_DESC : MSA_3RF_DESC_BASE<"fcaf.w", int_mips_fcaf_w, MSA128WOpnd>,
+ IsCommutable;
+class FCAF_D_DESC : MSA_3RF_DESC_BASE<"fcaf.d", int_mips_fcaf_d, MSA128DOpnd>,
+ IsCommutable;
+
+class FCEQ_W_DESC : MSA_3RF_DESC_BASE<"fceq.w", vfsetoeq_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCEQ_D_DESC : MSA_3RF_DESC_BASE<"fceq.d", vfsetoeq_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCLASS_W_DESC : MSA_2RF_DESC_BASE<"fclass.w", int_mips_fclass_w,
+ MSA128WOpnd>;
+class FCLASS_D_DESC : MSA_2RF_DESC_BASE<"fclass.d", int_mips_fclass_d,
+ MSA128DOpnd>;
+
+class FCLE_W_DESC : MSA_3RF_DESC_BASE<"fcle.w", vfsetole_v4f32, MSA128WOpnd>;
+class FCLE_D_DESC : MSA_3RF_DESC_BASE<"fcle.d", vfsetole_v2f64, MSA128DOpnd>;
+
+class FCLT_W_DESC : MSA_3RF_DESC_BASE<"fclt.w", vfsetolt_v4f32, MSA128WOpnd>;
+class FCLT_D_DESC : MSA_3RF_DESC_BASE<"fclt.d", vfsetolt_v2f64, MSA128DOpnd>;
+
+class FCNE_W_DESC : MSA_3RF_DESC_BASE<"fcne.w", vfsetone_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCNE_D_DESC : MSA_3RF_DESC_BASE<"fcne.d", vfsetone_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCOR_W_DESC : MSA_3RF_DESC_BASE<"fcor.w", vfsetord_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCOR_D_DESC : MSA_3RF_DESC_BASE<"fcor.d", vfsetord_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUEQ_W_DESC : MSA_3RF_DESC_BASE<"fcueq.w", vfsetueq_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUEQ_D_DESC : MSA_3RF_DESC_BASE<"fcueq.d", vfsetueq_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCULE_W_DESC : MSA_3RF_DESC_BASE<"fcule.w", vfsetule_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCULE_D_DESC : MSA_3RF_DESC_BASE<"fcule.d", vfsetule_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCULT_W_DESC : MSA_3RF_DESC_BASE<"fcult.w", vfsetult_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCULT_D_DESC : MSA_3RF_DESC_BASE<"fcult.d", vfsetult_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUN_W_DESC : MSA_3RF_DESC_BASE<"fcun.w", vfsetun_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUN_D_DESC : MSA_3RF_DESC_BASE<"fcun.d", vfsetun_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FCUNE_W_DESC : MSA_3RF_DESC_BASE<"fcune.w", vfsetune_v4f32, MSA128WOpnd>,
+ IsCommutable;
+class FCUNE_D_DESC : MSA_3RF_DESC_BASE<"fcune.d", vfsetune_v2f64, MSA128DOpnd>,
+ IsCommutable;
+
+class FDIV_W_DESC : MSA_3RF_DESC_BASE<"fdiv.w", fdiv, MSA128WOpnd>;
+class FDIV_D_DESC : MSA_3RF_DESC_BASE<"fdiv.d", fdiv, MSA128DOpnd>;
+
+class FEXDO_H_DESC : MSA_3RF_DESC_BASE<"fexdo.h", int_mips_fexdo_h,
+ MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>;
+class FEXDO_W_DESC : MSA_3RF_DESC_BASE<"fexdo.w", int_mips_fexdo_w,
+ MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>;
+
+// The fexp2.df instruction multiplies the first operand by 2 to the power of
+// the second operand. We therefore need a pseudo-insn in order to invent the
+// 1.0 when we only need to match ISD::FEXP2.
+class FEXP2_W_DESC : MSA_3RF_DESC_BASE<"fexp2.w", mul_fexp2, MSA128WOpnd>;
+class FEXP2_D_DESC : MSA_3RF_DESC_BASE<"fexp2.d", mul_fexp2, MSA128DOpnd>;
+let usesCustomInserter = 1 in {
+ class FEXP2_W_1_PSEUDO_DESC :
+ MipsPseudo<(outs MSA128W:$wd), (ins MSA128W:$ws),
+ [(set MSA128W:$wd, (fexp2 MSA128W:$ws))]>;
+ class FEXP2_D_1_PSEUDO_DESC :
+ MipsPseudo<(outs MSA128D:$wd), (ins MSA128D:$ws),
+ [(set MSA128D:$wd, (fexp2 MSA128D:$ws))]>;
+}
+
+class FEXUPL_W_DESC : MSA_2RF_DESC_BASE<"fexupl.w", int_mips_fexupl_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FEXUPL_D_DESC : MSA_2RF_DESC_BASE<"fexupl.d", int_mips_fexupl_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FEXUPR_W_DESC : MSA_2RF_DESC_BASE<"fexupr.w", int_mips_fexupr_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FEXUPR_D_DESC : MSA_2RF_DESC_BASE<"fexupr.d", int_mips_fexupr_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FFINT_S_W_DESC : MSA_2RF_DESC_BASE<"ffint_s.w", sint_to_fp, MSA128WOpnd>;
+class FFINT_S_D_DESC : MSA_2RF_DESC_BASE<"ffint_s.d", sint_to_fp, MSA128DOpnd>;
+
+class FFINT_U_W_DESC : MSA_2RF_DESC_BASE<"ffint_u.w", uint_to_fp, MSA128WOpnd>;
+class FFINT_U_D_DESC : MSA_2RF_DESC_BASE<"ffint_u.d", uint_to_fp, MSA128DOpnd>;
+
+class FFQL_W_DESC : MSA_2RF_DESC_BASE<"ffql.w", int_mips_ffql_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FFQL_D_DESC : MSA_2RF_DESC_BASE<"ffql.d", int_mips_ffql_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w,
+ MSA128WOpnd, MSA128HOpnd>;
+class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d,
+ MSA128DOpnd, MSA128WOpnd>;
+
+class FILL_B_DESC : MSA_2R_FILL_DESC_BASE<"fill.b", v16i8, vsplati8,
+ MSA128BOpnd, GPR32Opnd>;
+class FILL_H_DESC : MSA_2R_FILL_DESC_BASE<"fill.h", v8i16, vsplati16,
+ MSA128HOpnd, GPR32Opnd>;
+class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32,
+ MSA128WOpnd, GPR32Opnd>;
+
+class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v4f32, vsplatf32, MSA128W,
+ FGR32>;
+class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v2f64, vsplatf64, MSA128D,
+ FGR64>;
+
+class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>;
+class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>;
+
+class FMADD_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.w", fma, MSA128WOpnd>;
+class FMADD_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmadd.d", fma, MSA128DOpnd>;
+
+class FMAX_W_DESC : MSA_3RF_DESC_BASE<"fmax.w", int_mips_fmax_w, MSA128WOpnd>;
+class FMAX_D_DESC : MSA_3RF_DESC_BASE<"fmax.d", int_mips_fmax_d, MSA128DOpnd>;
+
+class FMAX_A_W_DESC : MSA_3RF_DESC_BASE<"fmax_a.w", int_mips_fmax_a_w,
+ MSA128WOpnd>;
+class FMAX_A_D_DESC : MSA_3RF_DESC_BASE<"fmax_a.d", int_mips_fmax_a_d,
+ MSA128DOpnd>;
+
+class FMIN_W_DESC : MSA_3RF_DESC_BASE<"fmin.w", int_mips_fmin_w, MSA128WOpnd>;
+class FMIN_D_DESC : MSA_3RF_DESC_BASE<"fmin.d", int_mips_fmin_d, MSA128DOpnd>;
+
+class FMIN_A_W_DESC : MSA_3RF_DESC_BASE<"fmin_a.w", int_mips_fmin_a_w,
+ MSA128WOpnd>;
+class FMIN_A_D_DESC : MSA_3RF_DESC_BASE<"fmin_a.d", int_mips_fmin_a_d,
+ MSA128DOpnd>;
+
+class FMSUB_W_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.w", fms, MSA128WOpnd>;
+class FMSUB_D_DESC : MSA_3RF_4RF_DESC_BASE<"fmsub.d", fms, MSA128DOpnd>;
+
+class FMUL_W_DESC : MSA_3RF_DESC_BASE<"fmul.w", fmul, MSA128WOpnd>;
+class FMUL_D_DESC : MSA_3RF_DESC_BASE<"fmul.d", fmul, MSA128DOpnd>;
+
+class FRINT_W_DESC : MSA_2RF_DESC_BASE<"frint.w", frint, MSA128WOpnd>;
+class FRINT_D_DESC : MSA_2RF_DESC_BASE<"frint.d", frint, MSA128DOpnd>;
+
+class FRCP_W_DESC : MSA_2RF_DESC_BASE<"frcp.w", int_mips_frcp_w, MSA128WOpnd>;
+class FRCP_D_DESC : MSA_2RF_DESC_BASE<"frcp.d", int_mips_frcp_d, MSA128DOpnd>;
+
+class FRSQRT_W_DESC : MSA_2RF_DESC_BASE<"frsqrt.w", int_mips_frsqrt_w,
+ MSA128WOpnd>;
+class FRSQRT_D_DESC : MSA_2RF_DESC_BASE<"frsqrt.d", int_mips_frsqrt_d,
+ MSA128DOpnd>;
+
+class FSAF_W_DESC : MSA_3RF_DESC_BASE<"fsaf.w", int_mips_fsaf_w, MSA128WOpnd>;
+class FSAF_D_DESC : MSA_3RF_DESC_BASE<"fsaf.d", int_mips_fsaf_d, MSA128DOpnd>;
+
+class FSEQ_W_DESC : MSA_3RF_DESC_BASE<"fseq.w", int_mips_fseq_w, MSA128WOpnd>;
+class FSEQ_D_DESC : MSA_3RF_DESC_BASE<"fseq.d", int_mips_fseq_d, MSA128DOpnd>;
+
+class FSLE_W_DESC : MSA_3RF_DESC_BASE<"fsle.w", int_mips_fsle_w, MSA128WOpnd>;
+class FSLE_D_DESC : MSA_3RF_DESC_BASE<"fsle.d", int_mips_fsle_d, MSA128DOpnd>;
+
+class FSLT_W_DESC : MSA_3RF_DESC_BASE<"fslt.w", int_mips_fslt_w, MSA128WOpnd>;
+class FSLT_D_DESC : MSA_3RF_DESC_BASE<"fslt.d", int_mips_fslt_d, MSA128DOpnd>;
+
+class FSNE_W_DESC : MSA_3RF_DESC_BASE<"fsne.w", int_mips_fsne_w, MSA128WOpnd>;
+class FSNE_D_DESC : MSA_3RF_DESC_BASE<"fsne.d", int_mips_fsne_d, MSA128DOpnd>;
+
+class FSOR_W_DESC : MSA_3RF_DESC_BASE<"fsor.w", int_mips_fsor_w, MSA128WOpnd>;
+class FSOR_D_DESC : MSA_3RF_DESC_BASE<"fsor.d", int_mips_fsor_d, MSA128DOpnd>;
+
+class FSQRT_W_DESC : MSA_2RF_DESC_BASE<"fsqrt.w", fsqrt, MSA128WOpnd>;
+class FSQRT_D_DESC : MSA_2RF_DESC_BASE<"fsqrt.d", fsqrt, MSA128DOpnd>;
+
+class FSUB_W_DESC : MSA_3RF_DESC_BASE<"fsub.w", fsub, MSA128WOpnd>;
+class FSUB_D_DESC : MSA_3RF_DESC_BASE<"fsub.d", fsub, MSA128DOpnd>;
+
+class FSUEQ_W_DESC : MSA_3RF_DESC_BASE<"fsueq.w", int_mips_fsueq_w,
+ MSA128WOpnd>;
+class FSUEQ_D_DESC : MSA_3RF_DESC_BASE<"fsueq.d", int_mips_fsueq_d,
+ MSA128DOpnd>;
+
+class FSULE_W_DESC : MSA_3RF_DESC_BASE<"fsule.w", int_mips_fsule_w,
+ MSA128WOpnd>;
+class FSULE_D_DESC : MSA_3RF_DESC_BASE<"fsule.d", int_mips_fsule_d,
+ MSA128DOpnd>;
+
+class FSULT_W_DESC : MSA_3RF_DESC_BASE<"fsult.w", int_mips_fsult_w,
+ MSA128WOpnd>;
+class FSULT_D_DESC : MSA_3RF_DESC_BASE<"fsult.d", int_mips_fsult_d,
+ MSA128DOpnd>;
+
+class FSUN_W_DESC : MSA_3RF_DESC_BASE<"fsun.w", int_mips_fsun_w,
+ MSA128WOpnd>;
+class FSUN_D_DESC : MSA_3RF_DESC_BASE<"fsun.d", int_mips_fsun_d,
+ MSA128DOpnd>;
+
+class FSUNE_W_DESC : MSA_3RF_DESC_BASE<"fsune.w", int_mips_fsune_w,
+ MSA128WOpnd>;
+class FSUNE_D_DESC : MSA_3RF_DESC_BASE<"fsune.d", int_mips_fsune_d,
+ MSA128DOpnd>;
+
+class FTINT_S_W_DESC : MSA_2RF_DESC_BASE<"ftint_s.w", int_mips_ftint_s_w,
+ MSA128WOpnd>;
+class FTINT_S_D_DESC : MSA_2RF_DESC_BASE<"ftint_s.d", int_mips_ftint_s_d,
+ MSA128DOpnd>;
+
+class FTINT_U_W_DESC : MSA_2RF_DESC_BASE<"ftint_u.w", int_mips_ftint_u_w,
+ MSA128WOpnd>;
+class FTINT_U_D_DESC : MSA_2RF_DESC_BASE<"ftint_u.d", int_mips_ftint_u_d,
+ MSA128DOpnd>;
+
+class FTQ_H_DESC : MSA_3RF_DESC_BASE<"ftq.h", int_mips_ftq_h,
+ MSA128HOpnd, MSA128WOpnd, MSA128WOpnd>;
+class FTQ_W_DESC : MSA_3RF_DESC_BASE<"ftq.w", int_mips_ftq_w,
+ MSA128WOpnd, MSA128DOpnd, MSA128DOpnd>;
+
+class FTRUNC_S_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.w", fp_to_sint,
+ MSA128WOpnd>;
+class FTRUNC_S_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_s.d", fp_to_sint,
+ MSA128DOpnd>;
+
+class FTRUNC_U_W_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.w", fp_to_uint,
+ MSA128WOpnd>;
+class FTRUNC_U_D_DESC : MSA_2RF_DESC_BASE<"ftrunc_u.d", fp_to_uint,
+ MSA128DOpnd>;
+
+class HADD_S_H_DESC : MSA_3R_DESC_BASE<"hadd_s.h", int_mips_hadd_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HADD_S_W_DESC : MSA_3R_DESC_BASE<"hadd_s.w", int_mips_hadd_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HADD_S_D_DESC : MSA_3R_DESC_BASE<"hadd_s.d", int_mips_hadd_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HADD_U_H_DESC : MSA_3R_DESC_BASE<"hadd_u.h", int_mips_hadd_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HADD_U_W_DESC : MSA_3R_DESC_BASE<"hadd_u.w", int_mips_hadd_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HADD_U_D_DESC : MSA_3R_DESC_BASE<"hadd_u.d", int_mips_hadd_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HSUB_S_H_DESC : MSA_3R_DESC_BASE<"hsub_s.h", int_mips_hsub_s_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HSUB_S_W_DESC : MSA_3R_DESC_BASE<"hsub_s.w", int_mips_hsub_s_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HSUB_S_D_DESC : MSA_3R_DESC_BASE<"hsub_s.d", int_mips_hsub_s_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class HSUB_U_H_DESC : MSA_3R_DESC_BASE<"hsub_u.h", int_mips_hsub_u_h,
+ MSA128HOpnd, MSA128BOpnd, MSA128BOpnd>;
+class HSUB_U_W_DESC : MSA_3R_DESC_BASE<"hsub_u.w", int_mips_hsub_u_w,
+ MSA128WOpnd, MSA128HOpnd, MSA128HOpnd>;
+class HSUB_U_D_DESC : MSA_3R_DESC_BASE<"hsub_u.d", int_mips_hsub_u_d,
+ MSA128DOpnd, MSA128WOpnd, MSA128WOpnd>;
+
+class ILVEV_B_DESC : MSA_3R_DESC_BASE<"ilvev.b", MipsILVEV, MSA128BOpnd>;
+class ILVEV_H_DESC : MSA_3R_DESC_BASE<"ilvev.h", MipsILVEV, MSA128HOpnd>;
+class ILVEV_W_DESC : MSA_3R_DESC_BASE<"ilvev.w", MipsILVEV, MSA128WOpnd>;
+class ILVEV_D_DESC : MSA_3R_DESC_BASE<"ilvev.d", MipsILVEV, MSA128DOpnd>;
+
+class ILVL_B_DESC : MSA_3R_DESC_BASE<"ilvl.b", MipsILVL, MSA128BOpnd>;
+class ILVL_H_DESC : MSA_3R_DESC_BASE<"ilvl.h", MipsILVL, MSA128HOpnd>;
+class ILVL_W_DESC : MSA_3R_DESC_BASE<"ilvl.w", MipsILVL, MSA128WOpnd>;
+class ILVL_D_DESC : MSA_3R_DESC_BASE<"ilvl.d", MipsILVL, MSA128DOpnd>;
+
+class ILVOD_B_DESC : MSA_3R_DESC_BASE<"ilvod.b", MipsILVOD, MSA128BOpnd>;
+class ILVOD_H_DESC : MSA_3R_DESC_BASE<"ilvod.h", MipsILVOD, MSA128HOpnd>;
+class ILVOD_W_DESC : MSA_3R_DESC_BASE<"ilvod.w", MipsILVOD, MSA128WOpnd>;
+class ILVOD_D_DESC : MSA_3R_DESC_BASE<"ilvod.d", MipsILVOD, MSA128DOpnd>;
+
+class ILVR_B_DESC : MSA_3R_DESC_BASE<"ilvr.b", MipsILVR, MSA128BOpnd>;
+class ILVR_H_DESC : MSA_3R_DESC_BASE<"ilvr.h", MipsILVR, MSA128HOpnd>;
+class ILVR_W_DESC : MSA_3R_DESC_BASE<"ilvr.w", MipsILVR, MSA128WOpnd>;
+class ILVR_D_DESC : MSA_3R_DESC_BASE<"ilvr.d", MipsILVR, MSA128DOpnd>;
+
+class INSERT_B_DESC : MSA_INSERT_DESC_BASE<"insert.b", vinsert_v16i8,
+ MSA128BOpnd, GPR32Opnd>;
+class INSERT_H_DESC : MSA_INSERT_DESC_BASE<"insert.h", vinsert_v8i16,
+ MSA128HOpnd, GPR32Opnd>;
+class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32,
+ MSA128WOpnd, GPR32Opnd>;
+
+class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v4f32,
+ MSA128WOpnd, FGR32Opnd>;
+class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v2f64,
+ MSA128DOpnd, FGR64Opnd>;
+
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", int_mips_insve_b,
+ MSA128BOpnd>;
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", int_mips_insve_h,
+ MSA128HOpnd>;
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", int_mips_insve_w,
+ MSA128WOpnd>;
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", int_mips_insve_d,
+ MSA128DOpnd>;
+
+class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType TyNode, RegisterOperand ROWD,
+ Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs ROWD:$wd);
+ dag InOperandList = (ins MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $addr");
+ list<dag> Pattern = [(set ROWD:$wd, (TyNode (OpNode Addr:$addr)))];
+ InstrItinClass Itinerary = itin;
+ string DecoderMethod = "DecodeMSA128Mem";
+}
+
+class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, MSA128BOpnd>;
+class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128HOpnd>;
+class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128WOpnd>;
+class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128DOpnd>;
+
+class LDI_B_DESC : MSA_I10_LDI_DESC_BASE<"ldi.b", MSA128BOpnd>;
+class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128HOpnd>;
+class LDI_W_DESC : MSA_I10_LDI_DESC_BASE<"ldi.w", MSA128WOpnd>;
+class LDI_D_DESC : MSA_I10_LDI_DESC_BASE<"ldi.d", MSA128DOpnd>;
+
+class LSA_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rd);
+ dag InOperandList = (ins GPR32Opnd:$rs, GPR32Opnd:$rt, LSAImm:$sa);
+ string AsmString = "lsa\t$rd, $rs, $rt, $sa";
+ list<dag> Pattern = [(set GPR32Opnd:$rd, (add GPR32Opnd:$rs,
+ (shl GPR32Opnd:$rt,
+ immZExt2Lsa:$sa)))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class MADD_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.h", int_mips_madd_q_h,
+ MSA128HOpnd>;
+class MADD_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"madd_q.w", int_mips_madd_q_w,
+ MSA128WOpnd>;
+
+class MADDR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.h", int_mips_maddr_q_h,
+ MSA128HOpnd>;
+class MADDR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"maddr_q.w", int_mips_maddr_q_w,
+ MSA128WOpnd>;
+
+class MADDV_B_DESC : MSA_3R_4R_DESC_BASE<"maddv.b", muladd, MSA128BOpnd>;
+class MADDV_H_DESC : MSA_3R_4R_DESC_BASE<"maddv.h", muladd, MSA128HOpnd>;
+class MADDV_W_DESC : MSA_3R_4R_DESC_BASE<"maddv.w", muladd, MSA128WOpnd>;
+class MADDV_D_DESC : MSA_3R_4R_DESC_BASE<"maddv.d", muladd, MSA128DOpnd>;
+
+class MAX_A_B_DESC : MSA_3R_DESC_BASE<"max_a.b", int_mips_max_a_b, MSA128BOpnd>;
+class MAX_A_H_DESC : MSA_3R_DESC_BASE<"max_a.h", int_mips_max_a_h, MSA128HOpnd>;
+class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128WOpnd>;
+class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128DOpnd>;
+
+class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128BOpnd>;
+class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128HOpnd>;
+class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128WOpnd>;
+class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128DOpnd>;
+
+class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128BOpnd>;
+class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128HOpnd>;
+class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128WOpnd>;
+class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128DOpnd>;
+
+class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8_simm5,
+ MSA128BOpnd>;
+class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16_simm5,
+ MSA128HOpnd>;
+class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32_simm5,
+ MSA128WOpnd>;
+class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8_uimm5,
+ MSA128BOpnd>;
+class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16_uimm5,
+ MSA128HOpnd>;
+class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32_uimm5,
+ MSA128WOpnd>;
+class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128BOpnd>;
+class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128HOpnd>;
+class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128WOpnd>;
+class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128DOpnd>;
+
+class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128BOpnd>;
+class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128HOpnd>;
+class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128WOpnd>;
+class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128DOpnd>;
+
+class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128BOpnd>;
+class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128HOpnd>;
+class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128WOpnd>;
+class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128DOpnd>;
+
+class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8_simm5,
+ MSA128BOpnd>;
+class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16_simm5,
+ MSA128HOpnd>;
+class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32_simm5,
+ MSA128WOpnd>;
+class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64_simm5,
+ MSA128DOpnd>;
+
+class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8_uimm5,
+ MSA128BOpnd>;
+class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16_uimm5,
+ MSA128HOpnd>;
+class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32_uimm5,
+ MSA128WOpnd>;
+class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", srem, MSA128BOpnd>;
+class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", srem, MSA128HOpnd>;
+class MOD_S_W_DESC : MSA_3R_DESC_BASE<"mod_s.w", srem, MSA128WOpnd>;
+class MOD_S_D_DESC : MSA_3R_DESC_BASE<"mod_s.d", srem, MSA128DOpnd>;
+
+class MOD_U_B_DESC : MSA_3R_DESC_BASE<"mod_u.b", urem, MSA128BOpnd>;
+class MOD_U_H_DESC : MSA_3R_DESC_BASE<"mod_u.h", urem, MSA128HOpnd>;
+class MOD_U_W_DESC : MSA_3R_DESC_BASE<"mod_u.w", urem, MSA128WOpnd>;
+class MOD_U_D_DESC : MSA_3R_DESC_BASE<"mod_u.d", urem, MSA128DOpnd>;
+
+class MOVE_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$ws);
+ string AsmString = "move.v\t$wd, $ws";
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class MSUB_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.h", int_mips_msub_q_h,
+ MSA128HOpnd>;
+class MSUB_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msub_q.w", int_mips_msub_q_w,
+ MSA128WOpnd>;
+
+class MSUBR_Q_H_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.h", int_mips_msubr_q_h,
+ MSA128HOpnd>;
+class MSUBR_Q_W_DESC : MSA_3RF_4RF_DESC_BASE<"msubr_q.w", int_mips_msubr_q_w,
+ MSA128WOpnd>;
+
+class MSUBV_B_DESC : MSA_3R_4R_DESC_BASE<"msubv.b", mulsub, MSA128BOpnd>;
+class MSUBV_H_DESC : MSA_3R_4R_DESC_BASE<"msubv.h", mulsub, MSA128HOpnd>;
+class MSUBV_W_DESC : MSA_3R_4R_DESC_BASE<"msubv.w", mulsub, MSA128WOpnd>;
+class MSUBV_D_DESC : MSA_3R_4R_DESC_BASE<"msubv.d", mulsub, MSA128DOpnd>;
+
+class MUL_Q_H_DESC : MSA_3RF_DESC_BASE<"mul_q.h", int_mips_mul_q_h,
+ MSA128HOpnd>;
+class MUL_Q_W_DESC : MSA_3RF_DESC_BASE<"mul_q.w", int_mips_mul_q_w,
+ MSA128WOpnd>;
+
+class MULR_Q_H_DESC : MSA_3RF_DESC_BASE<"mulr_q.h", int_mips_mulr_q_h,
+ MSA128HOpnd>;
+class MULR_Q_W_DESC : MSA_3RF_DESC_BASE<"mulr_q.w", int_mips_mulr_q_w,
+ MSA128WOpnd>;
+
+class MULV_B_DESC : MSA_3R_DESC_BASE<"mulv.b", mul, MSA128BOpnd>;
+class MULV_H_DESC : MSA_3R_DESC_BASE<"mulv.h", mul, MSA128HOpnd>;
+class MULV_W_DESC : MSA_3R_DESC_BASE<"mulv.w", mul, MSA128WOpnd>;
+class MULV_D_DESC : MSA_3R_DESC_BASE<"mulv.d", mul, MSA128DOpnd>;
+
+class NLOC_B_DESC : MSA_2R_DESC_BASE<"nloc.b", int_mips_nloc_b, MSA128BOpnd>;
+class NLOC_H_DESC : MSA_2R_DESC_BASE<"nloc.h", int_mips_nloc_h, MSA128HOpnd>;
+class NLOC_W_DESC : MSA_2R_DESC_BASE<"nloc.w", int_mips_nloc_w, MSA128WOpnd>;
+class NLOC_D_DESC : MSA_2R_DESC_BASE<"nloc.d", int_mips_nloc_d, MSA128DOpnd>;
+
+class NLZC_B_DESC : MSA_2R_DESC_BASE<"nlzc.b", ctlz, MSA128BOpnd>;
+class NLZC_H_DESC : MSA_2R_DESC_BASE<"nlzc.h", ctlz, MSA128HOpnd>;
+class NLZC_W_DESC : MSA_2R_DESC_BASE<"nlzc.w", ctlz, MSA128WOpnd>;
+class NLZC_D_DESC : MSA_2R_DESC_BASE<"nlzc.d", ctlz, MSA128DOpnd>;
+
+class NOR_V_DESC : MSA_VEC_DESC_BASE<"nor.v", MipsVNOR, MSA128BOpnd>;
+class NOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128HOpnd>;
+class NOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128WOpnd>;
+class NOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<MipsVNOR, MSA128DOpnd>;
+
+class NORI_B_DESC : MSA_I8_DESC_BASE<"nori.b", MipsVNOR, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+class OR_V_DESC : MSA_VEC_DESC_BASE<"or.v", or, MSA128BOpnd>;
+class OR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128HOpnd>;
+class OR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128WOpnd>;
+class OR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<or, MSA128DOpnd>;
+
+class ORI_B_DESC : MSA_I8_DESC_BASE<"ori.b", or, vsplati8_uimm8, MSA128BOpnd>;
+
+class PCKEV_B_DESC : MSA_3R_DESC_BASE<"pckev.b", MipsPCKEV, MSA128BOpnd>;
+class PCKEV_H_DESC : MSA_3R_DESC_BASE<"pckev.h", MipsPCKEV, MSA128HOpnd>;
+class PCKEV_W_DESC : MSA_3R_DESC_BASE<"pckev.w", MipsPCKEV, MSA128WOpnd>;
+class PCKEV_D_DESC : MSA_3R_DESC_BASE<"pckev.d", MipsPCKEV, MSA128DOpnd>;
+
+class PCKOD_B_DESC : MSA_3R_DESC_BASE<"pckod.b", MipsPCKOD, MSA128BOpnd>;
+class PCKOD_H_DESC : MSA_3R_DESC_BASE<"pckod.h", MipsPCKOD, MSA128HOpnd>;
+class PCKOD_W_DESC : MSA_3R_DESC_BASE<"pckod.w", MipsPCKOD, MSA128WOpnd>;
+class PCKOD_D_DESC : MSA_3R_DESC_BASE<"pckod.d", MipsPCKOD, MSA128DOpnd>;
+
+class PCNT_B_DESC : MSA_2R_DESC_BASE<"pcnt.b", ctpop, MSA128BOpnd>;
+class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>;
+class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
+class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
+
+class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b,
+ MSA128BOpnd>;
+class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h,
+ MSA128HOpnd>;
+class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w,
+ MSA128WOpnd>;
+class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d,
+ MSA128DOpnd>;
+
+class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b,
+ MSA128BOpnd>;
+class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h,
+ MSA128HOpnd>;
+class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w,
+ MSA128WOpnd>;
+class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d,
+ MSA128DOpnd>;
+
+class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
+class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
+class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128WOpnd>;
+
+class SLD_B_DESC : MSA_3R_SLD_DESC_BASE<"sld.b", int_mips_sld_b, MSA128BOpnd>;
+class SLD_H_DESC : MSA_3R_SLD_DESC_BASE<"sld.h", int_mips_sld_h, MSA128HOpnd>;
+class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
+class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
+
+class SLDI_B_DESC : MSA_ELM_DESC_BASE<"sldi.b", int_mips_sldi_b, MSA128BOpnd>;
+class SLDI_H_DESC : MSA_ELM_DESC_BASE<"sldi.h", int_mips_sldi_h, MSA128HOpnd>;
+class SLDI_W_DESC : MSA_ELM_DESC_BASE<"sldi.w", int_mips_sldi_w, MSA128WOpnd>;
+class SLDI_D_DESC : MSA_ELM_DESC_BASE<"sldi.d", int_mips_sldi_d, MSA128DOpnd>;
+
+class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
+class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
+class SLL_W_DESC : MSA_3R_DESC_BASE<"sll.w", shl, MSA128WOpnd>;
+class SLL_D_DESC : MSA_3R_DESC_BASE<"sll.d", shl, MSA128DOpnd>;
+
+class SLLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.b", shl, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SLLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.h", shl, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SLLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.w", shl, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SLLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"slli.d", shl, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SPLAT_B_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.b", vsplati8_elt,
+ MSA128BOpnd>;
+class SPLAT_H_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.h", vsplati16_elt,
+ MSA128HOpnd>;
+class SPLAT_W_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.w", vsplati32_elt,
+ MSA128WOpnd>;
+class SPLAT_D_DESC : MSA_3R_SPLAT_DESC_BASE<"splat.d", vsplati64_elt,
+ MSA128DOpnd>;
+
+class SPLATI_B_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.b", vsplati8_uimm4,
+ MSA128BOpnd>;
+class SPLATI_H_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.h", vsplati16_uimm3,
+ MSA128HOpnd>;
+class SPLATI_W_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.w", vsplati32_uimm2,
+ MSA128WOpnd>;
+class SPLATI_D_DESC : MSA_ELM_SPLAT_DESC_BASE<"splati.d", vsplati64_uimm1,
+ MSA128DOpnd>;
+
+class SRA_B_DESC : MSA_3R_DESC_BASE<"sra.b", sra, MSA128BOpnd>;
+class SRA_H_DESC : MSA_3R_DESC_BASE<"sra.h", sra, MSA128HOpnd>;
+class SRA_W_DESC : MSA_3R_DESC_BASE<"sra.w", sra, MSA128WOpnd>;
+class SRA_D_DESC : MSA_3R_DESC_BASE<"sra.d", sra, MSA128DOpnd>;
+
+class SRAI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.b", sra, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SRAI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.h", sra, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SRAI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.w", sra, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SRAI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srai.d", sra, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SRAR_B_DESC : MSA_3R_DESC_BASE<"srar.b", int_mips_srar_b, MSA128BOpnd>;
+class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>;
+class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
+class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
+
+class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b,
+ MSA128BOpnd>;
+class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h,
+ MSA128HOpnd>;
+class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w,
+ MSA128WOpnd>;
+class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d,
+ MSA128DOpnd>;
+
+class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
+class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
+class SRL_W_DESC : MSA_3R_DESC_BASE<"srl.w", srl, MSA128WOpnd>;
+class SRL_D_DESC : MSA_3R_DESC_BASE<"srl.d", srl, MSA128DOpnd>;
+
+class SRLI_B_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.b", srl, vsplati8_uimm3,
+ MSA128BOpnd>;
+class SRLI_H_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.h", srl, vsplati16_uimm4,
+ MSA128HOpnd>;
+class SRLI_W_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.w", srl, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SRLI_D_DESC : MSA_BIT_SPLAT_DESC_BASE<"srli.d", srl, vsplati64_uimm6,
+ MSA128DOpnd>;
+
+class SRLR_B_DESC : MSA_3R_DESC_BASE<"srlr.b", int_mips_srlr_b, MSA128BOpnd>;
+class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>;
+class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
+class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
+
+class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b,
+ MSA128BOpnd>;
+class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h,
+ MSA128HOpnd>;
+class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w,
+ MSA128WOpnd>;
+class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d,
+ MSA128DOpnd>;
+
+class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ ValueType TyNode, RegisterOperand ROWD,
+ Operand MemOpnd = mem, ComplexPattern Addr = addrRegImm,
+ InstrItinClass itin = NoItinerary> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins ROWD:$wd, MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$wd, $addr");
+ list<dag> Pattern = [(OpNode (TyNode ROWD:$wd), Addr:$addr)];
+ InstrItinClass Itinerary = itin;
+ string DecoderMethod = "DecodeMSA128Mem";
+}
+
+class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, MSA128BOpnd>;
+class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, MSA128HOpnd>;
+class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128WOpnd>;
+class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd>;
+
+class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b,
+ MSA128BOpnd>;
+class SUBS_S_H_DESC : MSA_3R_DESC_BASE<"subs_s.h", int_mips_subs_s_h,
+ MSA128HOpnd>;
+class SUBS_S_W_DESC : MSA_3R_DESC_BASE<"subs_s.w", int_mips_subs_s_w,
+ MSA128WOpnd>;
+class SUBS_S_D_DESC : MSA_3R_DESC_BASE<"subs_s.d", int_mips_subs_s_d,
+ MSA128DOpnd>;
+
+class SUBS_U_B_DESC : MSA_3R_DESC_BASE<"subs_u.b", int_mips_subs_u_b,
+ MSA128BOpnd>;
+class SUBS_U_H_DESC : MSA_3R_DESC_BASE<"subs_u.h", int_mips_subs_u_h,
+ MSA128HOpnd>;
+class SUBS_U_W_DESC : MSA_3R_DESC_BASE<"subs_u.w", int_mips_subs_u_w,
+ MSA128WOpnd>;
+class SUBS_U_D_DESC : MSA_3R_DESC_BASE<"subs_u.d", int_mips_subs_u_d,
+ MSA128DOpnd>;
+
+class SUBSUS_U_B_DESC : MSA_3R_DESC_BASE<"subsus_u.b", int_mips_subsus_u_b,
+ MSA128BOpnd>;
+class SUBSUS_U_H_DESC : MSA_3R_DESC_BASE<"subsus_u.h", int_mips_subsus_u_h,
+ MSA128HOpnd>;
+class SUBSUS_U_W_DESC : MSA_3R_DESC_BASE<"subsus_u.w", int_mips_subsus_u_w,
+ MSA128WOpnd>;
+class SUBSUS_U_D_DESC : MSA_3R_DESC_BASE<"subsus_u.d", int_mips_subsus_u_d,
+ MSA128DOpnd>;
+
+class SUBSUU_S_B_DESC : MSA_3R_DESC_BASE<"subsuu_s.b", int_mips_subsuu_s_b,
+ MSA128BOpnd>;
+class SUBSUU_S_H_DESC : MSA_3R_DESC_BASE<"subsuu_s.h", int_mips_subsuu_s_h,
+ MSA128HOpnd>;
+class SUBSUU_S_W_DESC : MSA_3R_DESC_BASE<"subsuu_s.w", int_mips_subsuu_s_w,
+ MSA128WOpnd>;
+class SUBSUU_S_D_DESC : MSA_3R_DESC_BASE<"subsuu_s.d", int_mips_subsuu_s_d,
+ MSA128DOpnd>;
+
+class SUBV_B_DESC : MSA_3R_DESC_BASE<"subv.b", sub, MSA128BOpnd>;
+class SUBV_H_DESC : MSA_3R_DESC_BASE<"subv.h", sub, MSA128HOpnd>;
+class SUBV_W_DESC : MSA_3R_DESC_BASE<"subv.w", sub, MSA128WOpnd>;
+class SUBV_D_DESC : MSA_3R_DESC_BASE<"subv.d", sub, MSA128DOpnd>;
+
+class SUBVI_B_DESC : MSA_I5_DESC_BASE<"subvi.b", sub, vsplati8_uimm5,
+ MSA128BOpnd>;
+class SUBVI_H_DESC : MSA_I5_DESC_BASE<"subvi.h", sub, vsplati16_uimm5,
+ MSA128HOpnd>;
+class SUBVI_W_DESC : MSA_I5_DESC_BASE<"subvi.w", sub, vsplati32_uimm5,
+ MSA128WOpnd>;
+class SUBVI_D_DESC : MSA_I5_DESC_BASE<"subvi.d", sub, vsplati64_uimm5,
+ MSA128DOpnd>;
+
+class VSHF_B_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.b", MSA128BOpnd>;
+class VSHF_H_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.h", MSA128HOpnd>;
+class VSHF_W_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.w", MSA128WOpnd>;
+class VSHF_D_DESC : MSA_3R_VSHF_DESC_BASE<"vshf.d", MSA128DOpnd>;
+
+class XOR_V_DESC : MSA_VEC_DESC_BASE<"xor.v", xor, MSA128BOpnd>;
+class XOR_V_H_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128HOpnd>;
+class XOR_V_W_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128WOpnd>;
+class XOR_V_D_PSEUDO_DESC : MSA_VEC_PSEUDO_BASE<xor, MSA128DOpnd>;
+
+class XORI_B_DESC : MSA_I8_DESC_BASE<"xori.b", xor, vsplati8_uimm8,
+ MSA128BOpnd>;
+
+// Instruction defs.
+def ADD_A_B : ADD_A_B_ENC, ADD_A_B_DESC;
+def ADD_A_H : ADD_A_H_ENC, ADD_A_H_DESC;
+def ADD_A_W : ADD_A_W_ENC, ADD_A_W_DESC;
+def ADD_A_D : ADD_A_D_ENC, ADD_A_D_DESC;
+
+def ADDS_A_B : ADDS_A_B_ENC, ADDS_A_B_DESC;
+def ADDS_A_H : ADDS_A_H_ENC, ADDS_A_H_DESC;
+def ADDS_A_W : ADDS_A_W_ENC, ADDS_A_W_DESC;
+def ADDS_A_D : ADDS_A_D_ENC, ADDS_A_D_DESC;
+
+def ADDS_S_B : ADDS_S_B_ENC, ADDS_S_B_DESC;
+def ADDS_S_H : ADDS_S_H_ENC, ADDS_S_H_DESC;
+def ADDS_S_W : ADDS_S_W_ENC, ADDS_S_W_DESC;
+def ADDS_S_D : ADDS_S_D_ENC, ADDS_S_D_DESC;
+
+def ADDS_U_B : ADDS_U_B_ENC, ADDS_U_B_DESC;
+def ADDS_U_H : ADDS_U_H_ENC, ADDS_U_H_DESC;
+def ADDS_U_W : ADDS_U_W_ENC, ADDS_U_W_DESC;
+def ADDS_U_D : ADDS_U_D_ENC, ADDS_U_D_DESC;
+
+def ADDV_B : ADDV_B_ENC, ADDV_B_DESC;
+def ADDV_H : ADDV_H_ENC, ADDV_H_DESC;
+def ADDV_W : ADDV_W_ENC, ADDV_W_DESC;
+def ADDV_D : ADDV_D_ENC, ADDV_D_DESC;
+
+def ADDVI_B : ADDVI_B_ENC, ADDVI_B_DESC;
+def ADDVI_H : ADDVI_H_ENC, ADDVI_H_DESC;
+def ADDVI_W : ADDVI_W_ENC, ADDVI_W_DESC;
+def ADDVI_D : ADDVI_D_ENC, ADDVI_D_DESC;
+
+def AND_V : AND_V_ENC, AND_V_DESC;
+def AND_V_H_PSEUDO : AND_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def AND_V_W_PSEUDO : AND_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def AND_V_D_PSEUDO : AND_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(AND_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def ANDI_B : ANDI_B_ENC, ANDI_B_DESC;
+
+def ASUB_S_B : ASUB_S_B_ENC, ASUB_S_B_DESC;
+def ASUB_S_H : ASUB_S_H_ENC, ASUB_S_H_DESC;
+def ASUB_S_W : ASUB_S_W_ENC, ASUB_S_W_DESC;
+def ASUB_S_D : ASUB_S_D_ENC, ASUB_S_D_DESC;
+
+def ASUB_U_B : ASUB_U_B_ENC, ASUB_U_B_DESC;
+def ASUB_U_H : ASUB_U_H_ENC, ASUB_U_H_DESC;
+def ASUB_U_W : ASUB_U_W_ENC, ASUB_U_W_DESC;
+def ASUB_U_D : ASUB_U_D_ENC, ASUB_U_D_DESC;
+
+def AVE_S_B : AVE_S_B_ENC, AVE_S_B_DESC;
+def AVE_S_H : AVE_S_H_ENC, AVE_S_H_DESC;
+def AVE_S_W : AVE_S_W_ENC, AVE_S_W_DESC;
+def AVE_S_D : AVE_S_D_ENC, AVE_S_D_DESC;
+
+def AVE_U_B : AVE_U_B_ENC, AVE_U_B_DESC;
+def AVE_U_H : AVE_U_H_ENC, AVE_U_H_DESC;
+def AVE_U_W : AVE_U_W_ENC, AVE_U_W_DESC;
+def AVE_U_D : AVE_U_D_ENC, AVE_U_D_DESC;
+
+def AVER_S_B : AVER_S_B_ENC, AVER_S_B_DESC;
+def AVER_S_H : AVER_S_H_ENC, AVER_S_H_DESC;
+def AVER_S_W : AVER_S_W_ENC, AVER_S_W_DESC;
+def AVER_S_D : AVER_S_D_ENC, AVER_S_D_DESC;
+
+def AVER_U_B : AVER_U_B_ENC, AVER_U_B_DESC;
+def AVER_U_H : AVER_U_H_ENC, AVER_U_H_DESC;
+def AVER_U_W : AVER_U_W_ENC, AVER_U_W_DESC;
+def AVER_U_D : AVER_U_D_ENC, AVER_U_D_DESC;
+
+def BCLR_B : BCLR_B_ENC, BCLR_B_DESC;
+def BCLR_H : BCLR_H_ENC, BCLR_H_DESC;
+def BCLR_W : BCLR_W_ENC, BCLR_W_DESC;
+def BCLR_D : BCLR_D_ENC, BCLR_D_DESC;
+
+def BCLRI_B : BCLRI_B_ENC, BCLRI_B_DESC;
+def BCLRI_H : BCLRI_H_ENC, BCLRI_H_DESC;
+def BCLRI_W : BCLRI_W_ENC, BCLRI_W_DESC;
+def BCLRI_D : BCLRI_D_ENC, BCLRI_D_DESC;
+
+def BINSL_B : BINSL_B_ENC, BINSL_B_DESC;
+def BINSL_H : BINSL_H_ENC, BINSL_H_DESC;
+def BINSL_W : BINSL_W_ENC, BINSL_W_DESC;
+def BINSL_D : BINSL_D_ENC, BINSL_D_DESC;
+
+def BINSLI_B : BINSLI_B_ENC, BINSLI_B_DESC;
+def BINSLI_H : BINSLI_H_ENC, BINSLI_H_DESC;
+def BINSLI_W : BINSLI_W_ENC, BINSLI_W_DESC;
+def BINSLI_D : BINSLI_D_ENC, BINSLI_D_DESC;
+
+def BINSR_B : BINSR_B_ENC, BINSR_B_DESC;
+def BINSR_H : BINSR_H_ENC, BINSR_H_DESC;
+def BINSR_W : BINSR_W_ENC, BINSR_W_DESC;
+def BINSR_D : BINSR_D_ENC, BINSR_D_DESC;
+
+def BINSRI_B : BINSRI_B_ENC, BINSRI_B_DESC;
+def BINSRI_H : BINSRI_H_ENC, BINSRI_H_DESC;
+def BINSRI_W : BINSRI_W_ENC, BINSRI_W_DESC;
+def BINSRI_D : BINSRI_D_ENC, BINSRI_D_DESC;
+
+def BMNZ_V : BMNZ_V_ENC, BMNZ_V_DESC;
+
+def BMNZI_B : BMNZI_B_ENC, BMNZI_B_DESC;
+
+def BMZ_V : BMZ_V_ENC, BMZ_V_DESC;
+
+def BMZI_B : BMZI_B_ENC, BMZI_B_DESC;
+
+def BNEG_B : BNEG_B_ENC, BNEG_B_DESC;
+def BNEG_H : BNEG_H_ENC, BNEG_H_DESC;
+def BNEG_W : BNEG_W_ENC, BNEG_W_DESC;
+def BNEG_D : BNEG_D_ENC, BNEG_D_DESC;
+
+def BNEGI_B : BNEGI_B_ENC, BNEGI_B_DESC;
+def BNEGI_H : BNEGI_H_ENC, BNEGI_H_DESC;
+def BNEGI_W : BNEGI_W_ENC, BNEGI_W_DESC;
+def BNEGI_D : BNEGI_D_ENC, BNEGI_D_DESC;
+
+def BNZ_B : BNZ_B_ENC, BNZ_B_DESC;
+def BNZ_H : BNZ_H_ENC, BNZ_H_DESC;
+def BNZ_W : BNZ_W_ENC, BNZ_W_DESC;
+def BNZ_D : BNZ_D_ENC, BNZ_D_DESC;
+
+def BNZ_V : BNZ_V_ENC, BNZ_V_DESC;
+
+def BSEL_V : BSEL_V_ENC, BSEL_V_DESC;
+
+class MSA_BSEL_PSEUDO_BASE<RegisterOperand RO, ValueType Ty> :
+ MipsPseudo<(outs RO:$wd), (ins RO:$wd_in, RO:$ws, RO:$wt),
+ [(set RO:$wd, (Ty (vselect RO:$wd_in, RO:$ws, RO:$wt)))]>,
+ PseudoInstExpansion<(BSEL_V MSA128BOpnd:$wd, MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws, MSA128BOpnd:$wt)> {
+ let Constraints = "$wd_in = $wd";
+}
+
+def BSEL_H_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128HOpnd, v8i16>;
+def BSEL_W_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128WOpnd, v4i32>;
+def BSEL_D_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128DOpnd, v2i64>;
+def BSEL_FW_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128WOpnd, v4f32>;
+def BSEL_FD_PSEUDO : MSA_BSEL_PSEUDO_BASE<MSA128DOpnd, v2f64>;
+
+def BSELI_B : BSELI_B_ENC, BSELI_B_DESC;
+
+def BSET_B : BSET_B_ENC, BSET_B_DESC;
+def BSET_H : BSET_H_ENC, BSET_H_DESC;
+def BSET_W : BSET_W_ENC, BSET_W_DESC;
+def BSET_D : BSET_D_ENC, BSET_D_DESC;
+
+def BSETI_B : BSETI_B_ENC, BSETI_B_DESC;
+def BSETI_H : BSETI_H_ENC, BSETI_H_DESC;
+def BSETI_W : BSETI_W_ENC, BSETI_W_DESC;
+def BSETI_D : BSETI_D_ENC, BSETI_D_DESC;
+
+def BZ_B : BZ_B_ENC, BZ_B_DESC;
+def BZ_H : BZ_H_ENC, BZ_H_DESC;
+def BZ_W : BZ_W_ENC, BZ_W_DESC;
+def BZ_D : BZ_D_ENC, BZ_D_DESC;
+
+def BZ_V : BZ_V_ENC, BZ_V_DESC;
+
+def CEQ_B : CEQ_B_ENC, CEQ_B_DESC;
+def CEQ_H : CEQ_H_ENC, CEQ_H_DESC;
+def CEQ_W : CEQ_W_ENC, CEQ_W_DESC;
+def CEQ_D : CEQ_D_ENC, CEQ_D_DESC;
+
+def CEQI_B : CEQI_B_ENC, CEQI_B_DESC;
+def CEQI_H : CEQI_H_ENC, CEQI_H_DESC;
+def CEQI_W : CEQI_W_ENC, CEQI_W_DESC;
+def CEQI_D : CEQI_D_ENC, CEQI_D_DESC;
+
+def CFCMSA : CFCMSA_ENC, CFCMSA_DESC;
+
+def CLE_S_B : CLE_S_B_ENC, CLE_S_B_DESC;
+def CLE_S_H : CLE_S_H_ENC, CLE_S_H_DESC;
+def CLE_S_W : CLE_S_W_ENC, CLE_S_W_DESC;
+def CLE_S_D : CLE_S_D_ENC, CLE_S_D_DESC;
+
+def CLE_U_B : CLE_U_B_ENC, CLE_U_B_DESC;
+def CLE_U_H : CLE_U_H_ENC, CLE_U_H_DESC;
+def CLE_U_W : CLE_U_W_ENC, CLE_U_W_DESC;
+def CLE_U_D : CLE_U_D_ENC, CLE_U_D_DESC;
+
+def CLEI_S_B : CLEI_S_B_ENC, CLEI_S_B_DESC;
+def CLEI_S_H : CLEI_S_H_ENC, CLEI_S_H_DESC;
+def CLEI_S_W : CLEI_S_W_ENC, CLEI_S_W_DESC;
+def CLEI_S_D : CLEI_S_D_ENC, CLEI_S_D_DESC;
+
+def CLEI_U_B : CLEI_U_B_ENC, CLEI_U_B_DESC;
+def CLEI_U_H : CLEI_U_H_ENC, CLEI_U_H_DESC;
+def CLEI_U_W : CLEI_U_W_ENC, CLEI_U_W_DESC;
+def CLEI_U_D : CLEI_U_D_ENC, CLEI_U_D_DESC;
+
+def CLT_S_B : CLT_S_B_ENC, CLT_S_B_DESC;
+def CLT_S_H : CLT_S_H_ENC, CLT_S_H_DESC;
+def CLT_S_W : CLT_S_W_ENC, CLT_S_W_DESC;
+def CLT_S_D : CLT_S_D_ENC, CLT_S_D_DESC;
+
+def CLT_U_B : CLT_U_B_ENC, CLT_U_B_DESC;
+def CLT_U_H : CLT_U_H_ENC, CLT_U_H_DESC;
+def CLT_U_W : CLT_U_W_ENC, CLT_U_W_DESC;
+def CLT_U_D : CLT_U_D_ENC, CLT_U_D_DESC;
+
+def CLTI_S_B : CLTI_S_B_ENC, CLTI_S_B_DESC;
+def CLTI_S_H : CLTI_S_H_ENC, CLTI_S_H_DESC;
+def CLTI_S_W : CLTI_S_W_ENC, CLTI_S_W_DESC;
+def CLTI_S_D : CLTI_S_D_ENC, CLTI_S_D_DESC;
+
+def CLTI_U_B : CLTI_U_B_ENC, CLTI_U_B_DESC;
+def CLTI_U_H : CLTI_U_H_ENC, CLTI_U_H_DESC;
+def CLTI_U_W : CLTI_U_W_ENC, CLTI_U_W_DESC;
+def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
+
+def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
+def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
+def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
+
+def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
+def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
+def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
+
+def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
+def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
+
+def CTCMSA : CTCMSA_ENC, CTCMSA_DESC;
+
+def DIV_S_B : DIV_S_B_ENC, DIV_S_B_DESC;
+def DIV_S_H : DIV_S_H_ENC, DIV_S_H_DESC;
+def DIV_S_W : DIV_S_W_ENC, DIV_S_W_DESC;
+def DIV_S_D : DIV_S_D_ENC, DIV_S_D_DESC;
+
+def DIV_U_B : DIV_U_B_ENC, DIV_U_B_DESC;
+def DIV_U_H : DIV_U_H_ENC, DIV_U_H_DESC;
+def DIV_U_W : DIV_U_W_ENC, DIV_U_W_DESC;
+def DIV_U_D : DIV_U_D_ENC, DIV_U_D_DESC;
+
+def DOTP_S_H : DOTP_S_H_ENC, DOTP_S_H_DESC;
+def DOTP_S_W : DOTP_S_W_ENC, DOTP_S_W_DESC;
+def DOTP_S_D : DOTP_S_D_ENC, DOTP_S_D_DESC;
+
+def DOTP_U_H : DOTP_U_H_ENC, DOTP_U_H_DESC;
+def DOTP_U_W : DOTP_U_W_ENC, DOTP_U_W_DESC;
+def DOTP_U_D : DOTP_U_D_ENC, DOTP_U_D_DESC;
+
+def DPADD_S_H : DPADD_S_H_ENC, DPADD_S_H_DESC;
+def DPADD_S_W : DPADD_S_W_ENC, DPADD_S_W_DESC;
+def DPADD_S_D : DPADD_S_D_ENC, DPADD_S_D_DESC;
+
+def DPADD_U_H : DPADD_U_H_ENC, DPADD_U_H_DESC;
+def DPADD_U_W : DPADD_U_W_ENC, DPADD_U_W_DESC;
+def DPADD_U_D : DPADD_U_D_ENC, DPADD_U_D_DESC;
+
+def DPSUB_S_H : DPSUB_S_H_ENC, DPSUB_S_H_DESC;
+def DPSUB_S_W : DPSUB_S_W_ENC, DPSUB_S_W_DESC;
+def DPSUB_S_D : DPSUB_S_D_ENC, DPSUB_S_D_DESC;
+
+def DPSUB_U_H : DPSUB_U_H_ENC, DPSUB_U_H_DESC;
+def DPSUB_U_W : DPSUB_U_W_ENC, DPSUB_U_W_DESC;
+def DPSUB_U_D : DPSUB_U_D_ENC, DPSUB_U_D_DESC;
+
+def FADD_W : FADD_W_ENC, FADD_W_DESC;
+def FADD_D : FADD_D_ENC, FADD_D_DESC;
+
+def FCAF_W : FCAF_W_ENC, FCAF_W_DESC;
+def FCAF_D : FCAF_D_ENC, FCAF_D_DESC;
+
+def FCEQ_W : FCEQ_W_ENC, FCEQ_W_DESC;
+def FCEQ_D : FCEQ_D_ENC, FCEQ_D_DESC;
+
+def FCLE_W : FCLE_W_ENC, FCLE_W_DESC;
+def FCLE_D : FCLE_D_ENC, FCLE_D_DESC;
+
+def FCLT_W : FCLT_W_ENC, FCLT_W_DESC;
+def FCLT_D : FCLT_D_ENC, FCLT_D_DESC;
+
+def FCLASS_W : FCLASS_W_ENC, FCLASS_W_DESC;
+def FCLASS_D : FCLASS_D_ENC, FCLASS_D_DESC;
+
+def FCNE_W : FCNE_W_ENC, FCNE_W_DESC;
+def FCNE_D : FCNE_D_ENC, FCNE_D_DESC;
+
+def FCOR_W : FCOR_W_ENC, FCOR_W_DESC;
+def FCOR_D : FCOR_D_ENC, FCOR_D_DESC;
+
+def FCUEQ_W : FCUEQ_W_ENC, FCUEQ_W_DESC;
+def FCUEQ_D : FCUEQ_D_ENC, FCUEQ_D_DESC;
+
+def FCULE_W : FCULE_W_ENC, FCULE_W_DESC;
+def FCULE_D : FCULE_D_ENC, FCULE_D_DESC;
+
+def FCULT_W : FCULT_W_ENC, FCULT_W_DESC;
+def FCULT_D : FCULT_D_ENC, FCULT_D_DESC;
+
+def FCUN_W : FCUN_W_ENC, FCUN_W_DESC;
+def FCUN_D : FCUN_D_ENC, FCUN_D_DESC;
+
+def FCUNE_W : FCUNE_W_ENC, FCUNE_W_DESC;
+def FCUNE_D : FCUNE_D_ENC, FCUNE_D_DESC;
+
+def FDIV_W : FDIV_W_ENC, FDIV_W_DESC;
+def FDIV_D : FDIV_D_ENC, FDIV_D_DESC;
+
+def FEXDO_H : FEXDO_H_ENC, FEXDO_H_DESC;
+def FEXDO_W : FEXDO_W_ENC, FEXDO_W_DESC;
+
+def FEXP2_W : FEXP2_W_ENC, FEXP2_W_DESC;
+def FEXP2_D : FEXP2_D_ENC, FEXP2_D_DESC;
+def FEXP2_W_1_PSEUDO : FEXP2_W_1_PSEUDO_DESC;
+def FEXP2_D_1_PSEUDO : FEXP2_D_1_PSEUDO_DESC;
+
+def FEXUPL_W : FEXUPL_W_ENC, FEXUPL_W_DESC;
+def FEXUPL_D : FEXUPL_D_ENC, FEXUPL_D_DESC;
+
+def FEXUPR_W : FEXUPR_W_ENC, FEXUPR_W_DESC;
+def FEXUPR_D : FEXUPR_D_ENC, FEXUPR_D_DESC;
+
+def FFINT_S_W : FFINT_S_W_ENC, FFINT_S_W_DESC;
+def FFINT_S_D : FFINT_S_D_ENC, FFINT_S_D_DESC;
+
+def FFINT_U_W : FFINT_U_W_ENC, FFINT_U_W_DESC;
+def FFINT_U_D : FFINT_U_D_ENC, FFINT_U_D_DESC;
+
+def FFQL_W : FFQL_W_ENC, FFQL_W_DESC;
+def FFQL_D : FFQL_D_ENC, FFQL_D_DESC;
+
+def FFQR_W : FFQR_W_ENC, FFQR_W_DESC;
+def FFQR_D : FFQR_D_ENC, FFQR_D_DESC;
+
+def FILL_B : FILL_B_ENC, FILL_B_DESC;
+def FILL_H : FILL_H_ENC, FILL_H_DESC;
+def FILL_W : FILL_W_ENC, FILL_W_DESC;
+def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
+def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
+
+def FLOG2_W : FLOG2_W_ENC, FLOG2_W_DESC;
+def FLOG2_D : FLOG2_D_ENC, FLOG2_D_DESC;
+
+def FMADD_W : FMADD_W_ENC, FMADD_W_DESC;
+def FMADD_D : FMADD_D_ENC, FMADD_D_DESC;
+
+def FMAX_W : FMAX_W_ENC, FMAX_W_DESC;
+def FMAX_D : FMAX_D_ENC, FMAX_D_DESC;
+
+def FMAX_A_W : FMAX_A_W_ENC, FMAX_A_W_DESC;
+def FMAX_A_D : FMAX_A_D_ENC, FMAX_A_D_DESC;
+
+def FMIN_W : FMIN_W_ENC, FMIN_W_DESC;
+def FMIN_D : FMIN_D_ENC, FMIN_D_DESC;
+
+def FMIN_A_W : FMIN_A_W_ENC, FMIN_A_W_DESC;
+def FMIN_A_D : FMIN_A_D_ENC, FMIN_A_D_DESC;
+
+def FMSUB_W : FMSUB_W_ENC, FMSUB_W_DESC;
+def FMSUB_D : FMSUB_D_ENC, FMSUB_D_DESC;
+
+def FMUL_W : FMUL_W_ENC, FMUL_W_DESC;
+def FMUL_D : FMUL_D_ENC, FMUL_D_DESC;
+
+def FRINT_W : FRINT_W_ENC, FRINT_W_DESC;
+def FRINT_D : FRINT_D_ENC, FRINT_D_DESC;
+
+def FRCP_W : FRCP_W_ENC, FRCP_W_DESC;
+def FRCP_D : FRCP_D_ENC, FRCP_D_DESC;
+
+def FRSQRT_W : FRSQRT_W_ENC, FRSQRT_W_DESC;
+def FRSQRT_D : FRSQRT_D_ENC, FRSQRT_D_DESC;
+
+def FSAF_W : FSAF_W_ENC, FSAF_W_DESC;
+def FSAF_D : FSAF_D_ENC, FSAF_D_DESC;
+
+def FSEQ_W : FSEQ_W_ENC, FSEQ_W_DESC;
+def FSEQ_D : FSEQ_D_ENC, FSEQ_D_DESC;
+
+def FSLE_W : FSLE_W_ENC, FSLE_W_DESC;
+def FSLE_D : FSLE_D_ENC, FSLE_D_DESC;
+
+def FSLT_W : FSLT_W_ENC, FSLT_W_DESC;
+def FSLT_D : FSLT_D_ENC, FSLT_D_DESC;
+
+def FSNE_W : FSNE_W_ENC, FSNE_W_DESC;
+def FSNE_D : FSNE_D_ENC, FSNE_D_DESC;
+
+def FSOR_W : FSOR_W_ENC, FSOR_W_DESC;
+def FSOR_D : FSOR_D_ENC, FSOR_D_DESC;
+
+def FSQRT_W : FSQRT_W_ENC, FSQRT_W_DESC;
+def FSQRT_D : FSQRT_D_ENC, FSQRT_D_DESC;
+
+def FSUB_W : FSUB_W_ENC, FSUB_W_DESC;
+def FSUB_D : FSUB_D_ENC, FSUB_D_DESC;
+
+def FSUEQ_W : FSUEQ_W_ENC, FSUEQ_W_DESC;
+def FSUEQ_D : FSUEQ_D_ENC, FSUEQ_D_DESC;
+
+def FSULE_W : FSULE_W_ENC, FSULE_W_DESC;
+def FSULE_D : FSULE_D_ENC, FSULE_D_DESC;
+
+def FSULT_W : FSULT_W_ENC, FSULT_W_DESC;
+def FSULT_D : FSULT_D_ENC, FSULT_D_DESC;
+
+def FSUN_W : FSUN_W_ENC, FSUN_W_DESC;
+def FSUN_D : FSUN_D_ENC, FSUN_D_DESC;
+
+def FSUNE_W : FSUNE_W_ENC, FSUNE_W_DESC;
+def FSUNE_D : FSUNE_D_ENC, FSUNE_D_DESC;
+
+def FTINT_S_W : FTINT_S_W_ENC, FTINT_S_W_DESC;
+def FTINT_S_D : FTINT_S_D_ENC, FTINT_S_D_DESC;
+
+def FTINT_U_W : FTINT_U_W_ENC, FTINT_U_W_DESC;
+def FTINT_U_D : FTINT_U_D_ENC, FTINT_U_D_DESC;
+
+def FTQ_H : FTQ_H_ENC, FTQ_H_DESC;
+def FTQ_W : FTQ_W_ENC, FTQ_W_DESC;
+
+def FTRUNC_S_W : FTRUNC_S_W_ENC, FTRUNC_S_W_DESC;
+def FTRUNC_S_D : FTRUNC_S_D_ENC, FTRUNC_S_D_DESC;
+
+def FTRUNC_U_W : FTRUNC_U_W_ENC, FTRUNC_U_W_DESC;
+def FTRUNC_U_D : FTRUNC_U_D_ENC, FTRUNC_U_D_DESC;
+
+def HADD_S_H : HADD_S_H_ENC, HADD_S_H_DESC;
+def HADD_S_W : HADD_S_W_ENC, HADD_S_W_DESC;
+def HADD_S_D : HADD_S_D_ENC, HADD_S_D_DESC;
+
+def HADD_U_H : HADD_U_H_ENC, HADD_U_H_DESC;
+def HADD_U_W : HADD_U_W_ENC, HADD_U_W_DESC;
+def HADD_U_D : HADD_U_D_ENC, HADD_U_D_DESC;
+
+def HSUB_S_H : HSUB_S_H_ENC, HSUB_S_H_DESC;
+def HSUB_S_W : HSUB_S_W_ENC, HSUB_S_W_DESC;
+def HSUB_S_D : HSUB_S_D_ENC, HSUB_S_D_DESC;
+
+def HSUB_U_H : HSUB_U_H_ENC, HSUB_U_H_DESC;
+def HSUB_U_W : HSUB_U_W_ENC, HSUB_U_W_DESC;
+def HSUB_U_D : HSUB_U_D_ENC, HSUB_U_D_DESC;
+
+def ILVEV_B : ILVEV_B_ENC, ILVEV_B_DESC;
+def ILVEV_H : ILVEV_H_ENC, ILVEV_H_DESC;
+def ILVEV_W : ILVEV_W_ENC, ILVEV_W_DESC;
+def ILVEV_D : ILVEV_D_ENC, ILVEV_D_DESC;
+
+def ILVL_B : ILVL_B_ENC, ILVL_B_DESC;
+def ILVL_H : ILVL_H_ENC, ILVL_H_DESC;
+def ILVL_W : ILVL_W_ENC, ILVL_W_DESC;
+def ILVL_D : ILVL_D_ENC, ILVL_D_DESC;
+
+def ILVOD_B : ILVOD_B_ENC, ILVOD_B_DESC;
+def ILVOD_H : ILVOD_H_ENC, ILVOD_H_DESC;
+def ILVOD_W : ILVOD_W_ENC, ILVOD_W_DESC;
+def ILVOD_D : ILVOD_D_ENC, ILVOD_D_DESC;
+
+def ILVR_B : ILVR_B_ENC, ILVR_B_DESC;
+def ILVR_H : ILVR_H_ENC, ILVR_H_DESC;
+def ILVR_W : ILVR_W_ENC, ILVR_W_DESC;
+def ILVR_D : ILVR_D_ENC, ILVR_D_DESC;
+
+def INSERT_B : INSERT_B_ENC, INSERT_B_DESC;
+def INSERT_H : INSERT_H_ENC, INSERT_H_DESC;
+def INSERT_W : INSERT_W_ENC, INSERT_W_DESC;
+
+// INSERT_FW_PSEUDO defined after INSVE_W
+// INSERT_FD_PSEUDO defined after INSVE_D
+
+def INSVE_B : INSVE_B_ENC, INSVE_B_DESC;
+def INSVE_H : INSVE_H_ENC, INSVE_H_DESC;
+def INSVE_W : INSVE_W_ENC, INSVE_W_DESC;
+def INSVE_D : INSVE_D_ENC, INSVE_D_DESC;
+
+def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC;
+def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC;
+
+def LD_B: LD_B_ENC, LD_B_DESC;
+def LD_H: LD_H_ENC, LD_H_DESC;
+def LD_W: LD_W_ENC, LD_W_DESC;
+def LD_D: LD_D_ENC, LD_D_DESC;
+
+def LDI_B : LDI_B_ENC, LDI_B_DESC;
+def LDI_H : LDI_H_ENC, LDI_H_DESC;
+def LDI_W : LDI_W_ENC, LDI_W_DESC;
+def LDI_D : LDI_D_ENC, LDI_D_DESC;
+
+def LSA : LSA_ENC, LSA_DESC;
+
+def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC;
+def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC;
+
+def MADDR_Q_H : MADDR_Q_H_ENC, MADDR_Q_H_DESC;
+def MADDR_Q_W : MADDR_Q_W_ENC, MADDR_Q_W_DESC;
+
+def MADDV_B : MADDV_B_ENC, MADDV_B_DESC;
+def MADDV_H : MADDV_H_ENC, MADDV_H_DESC;
+def MADDV_W : MADDV_W_ENC, MADDV_W_DESC;
+def MADDV_D : MADDV_D_ENC, MADDV_D_DESC;
+
+def MAX_A_B : MAX_A_B_ENC, MAX_A_B_DESC;
+def MAX_A_H : MAX_A_H_ENC, MAX_A_H_DESC;
+def MAX_A_W : MAX_A_W_ENC, MAX_A_W_DESC;
+def MAX_A_D : MAX_A_D_ENC, MAX_A_D_DESC;
+
+def MAX_S_B : MAX_S_B_ENC, MAX_S_B_DESC;
+def MAX_S_H : MAX_S_H_ENC, MAX_S_H_DESC;
+def MAX_S_W : MAX_S_W_ENC, MAX_S_W_DESC;
+def MAX_S_D : MAX_S_D_ENC, MAX_S_D_DESC;
+
+def MAX_U_B : MAX_U_B_ENC, MAX_U_B_DESC;
+def MAX_U_H : MAX_U_H_ENC, MAX_U_H_DESC;
+def MAX_U_W : MAX_U_W_ENC, MAX_U_W_DESC;
+def MAX_U_D : MAX_U_D_ENC, MAX_U_D_DESC;
+
+def MAXI_S_B : MAXI_S_B_ENC, MAXI_S_B_DESC;
+def MAXI_S_H : MAXI_S_H_ENC, MAXI_S_H_DESC;
+def MAXI_S_W : MAXI_S_W_ENC, MAXI_S_W_DESC;
+def MAXI_S_D : MAXI_S_D_ENC, MAXI_S_D_DESC;
+
+def MAXI_U_B : MAXI_U_B_ENC, MAXI_U_B_DESC;
+def MAXI_U_H : MAXI_U_H_ENC, MAXI_U_H_DESC;
+def MAXI_U_W : MAXI_U_W_ENC, MAXI_U_W_DESC;
+def MAXI_U_D : MAXI_U_D_ENC, MAXI_U_D_DESC;
+
+def MIN_A_B : MIN_A_B_ENC, MIN_A_B_DESC;
+def MIN_A_H : MIN_A_H_ENC, MIN_A_H_DESC;
+def MIN_A_W : MIN_A_W_ENC, MIN_A_W_DESC;
+def MIN_A_D : MIN_A_D_ENC, MIN_A_D_DESC;
+
+def MIN_S_B : MIN_S_B_ENC, MIN_S_B_DESC;
+def MIN_S_H : MIN_S_H_ENC, MIN_S_H_DESC;
+def MIN_S_W : MIN_S_W_ENC, MIN_S_W_DESC;
+def MIN_S_D : MIN_S_D_ENC, MIN_S_D_DESC;
+
+def MIN_U_B : MIN_U_B_ENC, MIN_U_B_DESC;
+def MIN_U_H : MIN_U_H_ENC, MIN_U_H_DESC;
+def MIN_U_W : MIN_U_W_ENC, MIN_U_W_DESC;
+def MIN_U_D : MIN_U_D_ENC, MIN_U_D_DESC;
+
+def MINI_S_B : MINI_S_B_ENC, MINI_S_B_DESC;
+def MINI_S_H : MINI_S_H_ENC, MINI_S_H_DESC;
+def MINI_S_W : MINI_S_W_ENC, MINI_S_W_DESC;
+def MINI_S_D : MINI_S_D_ENC, MINI_S_D_DESC;
+
+def MINI_U_B : MINI_U_B_ENC, MINI_U_B_DESC;
+def MINI_U_H : MINI_U_H_ENC, MINI_U_H_DESC;
+def MINI_U_W : MINI_U_W_ENC, MINI_U_W_DESC;
+def MINI_U_D : MINI_U_D_ENC, MINI_U_D_DESC;
+
+def MOD_S_B : MOD_S_B_ENC, MOD_S_B_DESC;
+def MOD_S_H : MOD_S_H_ENC, MOD_S_H_DESC;
+def MOD_S_W : MOD_S_W_ENC, MOD_S_W_DESC;
+def MOD_S_D : MOD_S_D_ENC, MOD_S_D_DESC;
+
+def MOD_U_B : MOD_U_B_ENC, MOD_U_B_DESC;
+def MOD_U_H : MOD_U_H_ENC, MOD_U_H_DESC;
+def MOD_U_W : MOD_U_W_ENC, MOD_U_W_DESC;
+def MOD_U_D : MOD_U_D_ENC, MOD_U_D_DESC;
+
+def MOVE_V : MOVE_V_ENC, MOVE_V_DESC;
+
+def MSUB_Q_H : MSUB_Q_H_ENC, MSUB_Q_H_DESC;
+def MSUB_Q_W : MSUB_Q_W_ENC, MSUB_Q_W_DESC;
+
+def MSUBR_Q_H : MSUBR_Q_H_ENC, MSUBR_Q_H_DESC;
+def MSUBR_Q_W : MSUBR_Q_W_ENC, MSUBR_Q_W_DESC;
+
+def MSUBV_B : MSUBV_B_ENC, MSUBV_B_DESC;
+def MSUBV_H : MSUBV_H_ENC, MSUBV_H_DESC;
+def MSUBV_W : MSUBV_W_ENC, MSUBV_W_DESC;
+def MSUBV_D : MSUBV_D_ENC, MSUBV_D_DESC;
+
+def MUL_Q_H : MUL_Q_H_ENC, MUL_Q_H_DESC;
+def MUL_Q_W : MUL_Q_W_ENC, MUL_Q_W_DESC;
+
+def MULR_Q_H : MULR_Q_H_ENC, MULR_Q_H_DESC;
+def MULR_Q_W : MULR_Q_W_ENC, MULR_Q_W_DESC;
+
+def MULV_B : MULV_B_ENC, MULV_B_DESC;
+def MULV_H : MULV_H_ENC, MULV_H_DESC;
+def MULV_W : MULV_W_ENC, MULV_W_DESC;
+def MULV_D : MULV_D_ENC, MULV_D_DESC;
+
+def NLOC_B : NLOC_B_ENC, NLOC_B_DESC;
+def NLOC_H : NLOC_H_ENC, NLOC_H_DESC;
+def NLOC_W : NLOC_W_ENC, NLOC_W_DESC;
+def NLOC_D : NLOC_D_ENC, NLOC_D_DESC;
+
+def NLZC_B : NLZC_B_ENC, NLZC_B_DESC;
+def NLZC_H : NLZC_H_ENC, NLZC_H_DESC;
+def NLZC_W : NLZC_W_ENC, NLZC_W_DESC;
+def NLZC_D : NLZC_D_ENC, NLZC_D_DESC;
+
+def NOR_V : NOR_V_ENC, NOR_V_DESC;
+def NOR_V_H_PSEUDO : NOR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def NOR_V_W_PSEUDO : NOR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def NOR_V_D_PSEUDO : NOR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(NOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def NORI_B : NORI_B_ENC, NORI_B_DESC;
+
+def OR_V : OR_V_ENC, OR_V_DESC;
+def OR_V_H_PSEUDO : OR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def OR_V_W_PSEUDO : OR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def OR_V_D_PSEUDO : OR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(OR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def ORI_B : ORI_B_ENC, ORI_B_DESC;
+
+def PCKEV_B : PCKEV_B_ENC, PCKEV_B_DESC;
+def PCKEV_H : PCKEV_H_ENC, PCKEV_H_DESC;
+def PCKEV_W : PCKEV_W_ENC, PCKEV_W_DESC;
+def PCKEV_D : PCKEV_D_ENC, PCKEV_D_DESC;
+
+def PCKOD_B : PCKOD_B_ENC, PCKOD_B_DESC;
+def PCKOD_H : PCKOD_H_ENC, PCKOD_H_DESC;
+def PCKOD_W : PCKOD_W_ENC, PCKOD_W_DESC;
+def PCKOD_D : PCKOD_D_ENC, PCKOD_D_DESC;
+
+def PCNT_B : PCNT_B_ENC, PCNT_B_DESC;
+def PCNT_H : PCNT_H_ENC, PCNT_H_DESC;
+def PCNT_W : PCNT_W_ENC, PCNT_W_DESC;
+def PCNT_D : PCNT_D_ENC, PCNT_D_DESC;
+
+def SAT_S_B : SAT_S_B_ENC, SAT_S_B_DESC;
+def SAT_S_H : SAT_S_H_ENC, SAT_S_H_DESC;
+def SAT_S_W : SAT_S_W_ENC, SAT_S_W_DESC;
+def SAT_S_D : SAT_S_D_ENC, SAT_S_D_DESC;
+
+def SAT_U_B : SAT_U_B_ENC, SAT_U_B_DESC;
+def SAT_U_H : SAT_U_H_ENC, SAT_U_H_DESC;
+def SAT_U_W : SAT_U_W_ENC, SAT_U_W_DESC;
+def SAT_U_D : SAT_U_D_ENC, SAT_U_D_DESC;
+
+def SHF_B : SHF_B_ENC, SHF_B_DESC;
+def SHF_H : SHF_H_ENC, SHF_H_DESC;
+def SHF_W : SHF_W_ENC, SHF_W_DESC;
+
+def SLD_B : SLD_B_ENC, SLD_B_DESC;
+def SLD_H : SLD_H_ENC, SLD_H_DESC;
+def SLD_W : SLD_W_ENC, SLD_W_DESC;
+def SLD_D : SLD_D_ENC, SLD_D_DESC;
+
+def SLDI_B : SLDI_B_ENC, SLDI_B_DESC;
+def SLDI_H : SLDI_H_ENC, SLDI_H_DESC;
+def SLDI_W : SLDI_W_ENC, SLDI_W_DESC;
+def SLDI_D : SLDI_D_ENC, SLDI_D_DESC;
+
+def SLL_B : SLL_B_ENC, SLL_B_DESC;
+def SLL_H : SLL_H_ENC, SLL_H_DESC;
+def SLL_W : SLL_W_ENC, SLL_W_DESC;
+def SLL_D : SLL_D_ENC, SLL_D_DESC;
+
+def SLLI_B : SLLI_B_ENC, SLLI_B_DESC;
+def SLLI_H : SLLI_H_ENC, SLLI_H_DESC;
+def SLLI_W : SLLI_W_ENC, SLLI_W_DESC;
+def SLLI_D : SLLI_D_ENC, SLLI_D_DESC;
+
+def SPLAT_B : SPLAT_B_ENC, SPLAT_B_DESC;
+def SPLAT_H : SPLAT_H_ENC, SPLAT_H_DESC;
+def SPLAT_W : SPLAT_W_ENC, SPLAT_W_DESC;
+def SPLAT_D : SPLAT_D_ENC, SPLAT_D_DESC;
+
+def SPLATI_B : SPLATI_B_ENC, SPLATI_B_DESC;
+def SPLATI_H : SPLATI_H_ENC, SPLATI_H_DESC;
+def SPLATI_W : SPLATI_W_ENC, SPLATI_W_DESC;
+def SPLATI_D : SPLATI_D_ENC, SPLATI_D_DESC;
+
+def SRA_B : SRA_B_ENC, SRA_B_DESC;
+def SRA_H : SRA_H_ENC, SRA_H_DESC;
+def SRA_W : SRA_W_ENC, SRA_W_DESC;
+def SRA_D : SRA_D_ENC, SRA_D_DESC;
+
+def SRAI_B : SRAI_B_ENC, SRAI_B_DESC;
+def SRAI_H : SRAI_H_ENC, SRAI_H_DESC;
+def SRAI_W : SRAI_W_ENC, SRAI_W_DESC;
+def SRAI_D : SRAI_D_ENC, SRAI_D_DESC;
+
+def SRAR_B : SRAR_B_ENC, SRAR_B_DESC;
+def SRAR_H : SRAR_H_ENC, SRAR_H_DESC;
+def SRAR_W : SRAR_W_ENC, SRAR_W_DESC;
+def SRAR_D : SRAR_D_ENC, SRAR_D_DESC;
+
+def SRARI_B : SRARI_B_ENC, SRARI_B_DESC;
+def SRARI_H : SRARI_H_ENC, SRARI_H_DESC;
+def SRARI_W : SRARI_W_ENC, SRARI_W_DESC;
+def SRARI_D : SRARI_D_ENC, SRARI_D_DESC;
+
+def SRL_B : SRL_B_ENC, SRL_B_DESC;
+def SRL_H : SRL_H_ENC, SRL_H_DESC;
+def SRL_W : SRL_W_ENC, SRL_W_DESC;
+def SRL_D : SRL_D_ENC, SRL_D_DESC;
+
+def SRLI_B : SRLI_B_ENC, SRLI_B_DESC;
+def SRLI_H : SRLI_H_ENC, SRLI_H_DESC;
+def SRLI_W : SRLI_W_ENC, SRLI_W_DESC;
+def SRLI_D : SRLI_D_ENC, SRLI_D_DESC;
+
+def SRLR_B : SRLR_B_ENC, SRLR_B_DESC;
+def SRLR_H : SRLR_H_ENC, SRLR_H_DESC;
+def SRLR_W : SRLR_W_ENC, SRLR_W_DESC;
+def SRLR_D : SRLR_D_ENC, SRLR_D_DESC;
+
+def SRLRI_B : SRLRI_B_ENC, SRLRI_B_DESC;
+def SRLRI_H : SRLRI_H_ENC, SRLRI_H_DESC;
+def SRLRI_W : SRLRI_W_ENC, SRLRI_W_DESC;
+def SRLRI_D : SRLRI_D_ENC, SRLRI_D_DESC;
+
+def ST_B: ST_B_ENC, ST_B_DESC;
+def ST_H: ST_H_ENC, ST_H_DESC;
+def ST_W: ST_W_ENC, ST_W_DESC;
+def ST_D: ST_D_ENC, ST_D_DESC;
+
+def SUBS_S_B : SUBS_S_B_ENC, SUBS_S_B_DESC;
+def SUBS_S_H : SUBS_S_H_ENC, SUBS_S_H_DESC;
+def SUBS_S_W : SUBS_S_W_ENC, SUBS_S_W_DESC;
+def SUBS_S_D : SUBS_S_D_ENC, SUBS_S_D_DESC;
+
+def SUBS_U_B : SUBS_U_B_ENC, SUBS_U_B_DESC;
+def SUBS_U_H : SUBS_U_H_ENC, SUBS_U_H_DESC;
+def SUBS_U_W : SUBS_U_W_ENC, SUBS_U_W_DESC;
+def SUBS_U_D : SUBS_U_D_ENC, SUBS_U_D_DESC;
+
+def SUBSUS_U_B : SUBSUS_U_B_ENC, SUBSUS_U_B_DESC;
+def SUBSUS_U_H : SUBSUS_U_H_ENC, SUBSUS_U_H_DESC;
+def SUBSUS_U_W : SUBSUS_U_W_ENC, SUBSUS_U_W_DESC;
+def SUBSUS_U_D : SUBSUS_U_D_ENC, SUBSUS_U_D_DESC;
+
+def SUBSUU_S_B : SUBSUU_S_B_ENC, SUBSUU_S_B_DESC;
+def SUBSUU_S_H : SUBSUU_S_H_ENC, SUBSUU_S_H_DESC;
+def SUBSUU_S_W : SUBSUU_S_W_ENC, SUBSUU_S_W_DESC;
+def SUBSUU_S_D : SUBSUU_S_D_ENC, SUBSUU_S_D_DESC;
+
+def SUBV_B : SUBV_B_ENC, SUBV_B_DESC;
+def SUBV_H : SUBV_H_ENC, SUBV_H_DESC;
+def SUBV_W : SUBV_W_ENC, SUBV_W_DESC;
+def SUBV_D : SUBV_D_ENC, SUBV_D_DESC;
+
+def SUBVI_B : SUBVI_B_ENC, SUBVI_B_DESC;
+def SUBVI_H : SUBVI_H_ENC, SUBVI_H_DESC;
+def SUBVI_W : SUBVI_W_ENC, SUBVI_W_DESC;
+def SUBVI_D : SUBVI_D_ENC, SUBVI_D_DESC;
+
+def VSHF_B : VSHF_B_ENC, VSHF_B_DESC;
+def VSHF_H : VSHF_H_ENC, VSHF_H_DESC;
+def VSHF_W : VSHF_W_ENC, VSHF_W_DESC;
+def VSHF_D : VSHF_D_ENC, VSHF_D_DESC;
+
+def XOR_V : XOR_V_ENC, XOR_V_DESC;
+def XOR_V_H_PSEUDO : XOR_V_H_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def XOR_V_W_PSEUDO : XOR_V_W_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+def XOR_V_D_PSEUDO : XOR_V_D_PSEUDO_DESC,
+ PseudoInstExpansion<(XOR_V MSA128BOpnd:$wd,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt)>;
+
+def XORI_B : XORI_B_ENC, XORI_B_DESC;
+
+// Patterns.
+class MSAPat<dag pattern, dag result, list<Predicate> pred = [HasMSA]> :
+ Pat<pattern, result>, Requires<pred>;
+
+def : MSAPat<(extractelt (v4i32 MSA128W:$ws), immZExt4:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt4:$idx)>;
+
+def : MSAPat<(v16i8 (load addr:$addr)), (LD_B addr:$addr)>;
+def : MSAPat<(v8i16 (load addr:$addr)), (LD_H addr:$addr)>;
+def : MSAPat<(v4i32 (load addr:$addr)), (LD_W addr:$addr)>;
+def : MSAPat<(v2i64 (load addr:$addr)), (LD_D addr:$addr)>;
+def : MSAPat<(v8f16 (load addr:$addr)), (LD_H addr:$addr)>;
+def : MSAPat<(v4f32 (load addr:$addr)), (LD_W addr:$addr)>;
+def : MSAPat<(v2f64 (load addr:$addr)), (LD_D addr:$addr)>;
+
+def : MSAPat<(v8f16 (load addrRegImm:$addr)), (LD_H addrRegImm:$addr)>;
+def : MSAPat<(v4f32 (load addrRegImm:$addr)), (LD_W addrRegImm:$addr)>;
+def : MSAPat<(v2f64 (load addrRegImm:$addr)), (LD_D addrRegImm:$addr)>;
+
+def : MSAPat<(store (v16i8 MSA128B:$ws), addr:$addr),
+ (ST_B MSA128B:$ws, addr:$addr)>;
+def : MSAPat<(store (v8i16 MSA128H:$ws), addr:$addr),
+ (ST_H MSA128H:$ws, addr:$addr)>;
+def : MSAPat<(store (v4i32 MSA128W:$ws), addr:$addr),
+ (ST_W MSA128W:$ws, addr:$addr)>;
+def : MSAPat<(store (v2i64 MSA128D:$ws), addr:$addr),
+ (ST_D MSA128D:$ws, addr:$addr)>;
+def : MSAPat<(store (v8f16 MSA128H:$ws), addr:$addr),
+ (ST_H MSA128H:$ws, addr:$addr)>;
+def : MSAPat<(store (v4f32 MSA128W:$ws), addr:$addr),
+ (ST_W MSA128W:$ws, addr:$addr)>;
+def : MSAPat<(store (v2f64 MSA128D:$ws), addr:$addr),
+ (ST_D MSA128D:$ws, addr:$addr)>;
+
+def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrRegImm:$addr),
+ (ST_H MSA128H:$ws, addrRegImm:$addr)>;
+def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrRegImm:$addr),
+ (ST_W MSA128W:$ws, addrRegImm:$addr)>;
+def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrRegImm:$addr),
+ (ST_D MSA128D:$ws, addrRegImm:$addr)>;
+
+class MSA_FABS_PSEUDO_DESC_BASE<RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> :
+ MipsPseudo<(outs ROWD:$wd),
+ (ins ROWS:$ws),
+ [(set ROWD:$wd, (fabs ROWS:$ws))]> {
+ InstrItinClass Itinerary = itin;
+}
+def FABS_W : MSA_FABS_PSEUDO_DESC_BASE<MSA128WOpnd>,
+ PseudoInstExpansion<(FMAX_A_W MSA128WOpnd:$wd, MSA128WOpnd:$ws,
+ MSA128WOpnd:$ws)>;
+def FABS_D : MSA_FABS_PSEUDO_DESC_BASE<MSA128DOpnd>,
+ PseudoInstExpansion<(FMAX_A_D MSA128DOpnd:$wd, MSA128DOpnd:$ws,
+ MSA128DOpnd:$ws)>;
+
+class MSABitconvertPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, list<Predicate> preds = [HasMSA]> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS SrcVT:$src, DstRC), preds>;
+
+// These are endian-independant because the element size doesnt change
+def : MSABitconvertPat<v8i16, v8f16, MSA128H>;
+def : MSABitconvertPat<v4i32, v4f32, MSA128W>;
+def : MSABitconvertPat<v2i64, v2f64, MSA128D>;
+def : MSABitconvertPat<v8f16, v8i16, MSA128H>;
+def : MSABitconvertPat<v4f32, v4i32, MSA128W>;
+def : MSABitconvertPat<v2f64, v2i64, MSA128D>;
+
+// Little endian bitcasts are always no-ops
+def : MSABitconvertPat<v16i8, v8i16, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v4i32, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v2i64, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v8f16, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v4f32, MSA128B, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v16i8, v2f64, MSA128B, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v8i16, v16i8, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v4i32, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v2i64, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v4f32, MSA128H, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v8i16, v2f64, MSA128H, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v4i32, v16i8, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v8i16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v2i64, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v8f16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4i32, v2f64, MSA128W, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v2i64, v16i8, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v8i16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v4i32, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v8f16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2i64, v4f32, MSA128D, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v4f32, v16i8, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v8i16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v2i64, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v8f16, MSA128W, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v4f32, v2f64, MSA128W, [HasMSA, IsLE]>;
+
+def : MSABitconvertPat<v2f64, v16i8, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v8i16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v4i32, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v8f16, MSA128D, [HasMSA, IsLE]>;
+def : MSABitconvertPat<v2f64, v4f32, MSA128D, [HasMSA, IsLE]>;
+
+// Big endian bitcasts expand to shuffle instructions.
+// This is because bitcast is defined to be a store/load sequence and the
+// vector store/load instructions are mixed-endian with respect to the vector
+// as a whole (little endian with respect to element order, but big endian
+// elements).
+
+class MSABitconvertReverseQuartersPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, MSAInst Insn,
+ RegisterClass ViaRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 27),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseHalvesPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC, MSAInst Insn,
+ RegisterClass ViaRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS (Insn (COPY_TO_REGCLASS SrcVT:$src, ViaRC), 177),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseBInHPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_B, MSA128B>;
+
+class MSABitconvertReverseBInWPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseQuartersPat<DstVT, SrcVT, DstRC, SHF_B, MSA128B>;
+
+class MSABitconvertReverseBInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSAPat<(DstVT (bitconvert SrcVT:$src)),
+ (COPY_TO_REGCLASS
+ (SHF_W
+ (COPY_TO_REGCLASS
+ (SHF_B (COPY_TO_REGCLASS SrcVT:$src, MSA128B), 27),
+ MSA128W), 177),
+ DstRC),
+ [HasMSA, IsBE]>;
+
+class MSABitconvertReverseHInWPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_H, MSA128H>;
+
+class MSABitconvertReverseHInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseQuartersPat<DstVT, SrcVT, DstRC, SHF_H, MSA128H>;
+
+class MSABitconvertReverseWInDPat<ValueType DstVT, ValueType SrcVT,
+ RegisterClass DstRC> :
+ MSABitconvertReverseHalvesPat<DstVT, SrcVT, DstRC, SHF_W, MSA128W>;
+
+def : MSABitconvertReverseBInHPat<v8i16, v16i8, MSA128H>;
+def : MSABitconvertReverseBInHPat<v8f16, v16i8, MSA128H>;
+def : MSABitconvertReverseBInWPat<v4i32, v16i8, MSA128W>;
+def : MSABitconvertReverseBInWPat<v4f32, v16i8, MSA128W>;
+def : MSABitconvertReverseBInDPat<v2i64, v16i8, MSA128D>;
+def : MSABitconvertReverseBInDPat<v2f64, v16i8, MSA128D>;
+
+def : MSABitconvertReverseBInHPat<v16i8, v8i16, MSA128B>;
+def : MSABitconvertReverseHInWPat<v4i32, v8i16, MSA128W>;
+def : MSABitconvertReverseHInWPat<v4f32, v8i16, MSA128W>;
+def : MSABitconvertReverseHInDPat<v2i64, v8i16, MSA128D>;
+def : MSABitconvertReverseHInDPat<v2f64, v8i16, MSA128D>;
+
+def : MSABitconvertReverseBInHPat<v16i8, v8f16, MSA128B>;
+def : MSABitconvertReverseHInWPat<v4i32, v8f16, MSA128W>;
+def : MSABitconvertReverseHInWPat<v4f32, v8f16, MSA128W>;
+def : MSABitconvertReverseHInDPat<v2i64, v8f16, MSA128D>;
+def : MSABitconvertReverseHInDPat<v2f64, v8f16, MSA128D>;
+
+def : MSABitconvertReverseBInWPat<v16i8, v4i32, MSA128B>;
+def : MSABitconvertReverseHInWPat<v8i16, v4i32, MSA128H>;
+def : MSABitconvertReverseHInWPat<v8f16, v4i32, MSA128H>;
+def : MSABitconvertReverseWInDPat<v2i64, v4i32, MSA128D>;
+def : MSABitconvertReverseWInDPat<v2f64, v4i32, MSA128D>;
+
+def : MSABitconvertReverseBInWPat<v16i8, v4f32, MSA128B>;
+def : MSABitconvertReverseHInWPat<v8i16, v4f32, MSA128H>;
+def : MSABitconvertReverseHInWPat<v8f16, v4f32, MSA128H>;
+def : MSABitconvertReverseWInDPat<v2i64, v4f32, MSA128D>;
+def : MSABitconvertReverseWInDPat<v2f64, v4f32, MSA128D>;
+
+def : MSABitconvertReverseBInDPat<v16i8, v2i64, MSA128B>;
+def : MSABitconvertReverseHInDPat<v8i16, v2i64, MSA128H>;
+def : MSABitconvertReverseHInDPat<v8f16, v2i64, MSA128H>;
+def : MSABitconvertReverseWInDPat<v4i32, v2i64, MSA128W>;
+def : MSABitconvertReverseWInDPat<v4f32, v2i64, MSA128W>;
+
+def : MSABitconvertReverseBInDPat<v16i8, v2f64, MSA128B>;
+def : MSABitconvertReverseHInDPat<v8i16, v2f64, MSA128H>;
+def : MSABitconvertReverseHInDPat<v8f16, v2f64, MSA128H>;
+def : MSABitconvertReverseWInDPat<v4i32, v2f64, MSA128W>;
+def : MSABitconvertReverseWInDPat<v4f32, v2f64, MSA128W>;
+
+// Pseudos used to implement BNZ.df, and BZ.df
+
+class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
+ RegisterClass RCWS,
+ InstrItinClass itin = NoItinerary> :
+ MipsPseudo<(outs GPR32:$dst),
+ (ins RCWS:$ws),
+ [(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
+ bit usesCustomInserter = 1;
+}
+
+def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
+ MSA128B, NoItinerary>;
+def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16,
+ MSA128H, NoItinerary>;
+def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32,
+ MSA128W, NoItinerary>;
+def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64,
+ MSA128D, NoItinerary>;
+def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8,
+ MSA128B, NoItinerary>;
+
+def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8,
+ MSA128B, NoItinerary>;
+def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16,
+ MSA128H, NoItinerary>;
+def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32,
+ MSA128W, NoItinerary>;
+def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
+ MSA128D, NoItinerary>;
+def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
+ MSA128B, NoItinerary>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index a7299d7..dedf802 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -22,6 +23,53 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
+// class MipsCallEntry.
+MipsCallEntry::MipsCallEntry(const StringRef &N) {
+#ifndef NDEBUG
+ Name = N;
+ Val = 0;
+#endif
+}
+
+MipsCallEntry::MipsCallEntry(const GlobalValue *V) {
+#ifndef NDEBUG
+ Val = V;
+#endif
+}
+
+bool MipsCallEntry::isConstant(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool MipsCallEntry::isAliased(const MachineFrameInfo *) const {
+ return false;
+}
+
+bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const {
+ return false;
+}
+
+void MipsCallEntry::printCustom(raw_ostream &O) const {
+ O << "MipsCallEntry: ";
+#ifndef NDEBUG
+ if (Val)
+ O << Val->getName();
+ else
+ O << Name;
+#endif
+}
+
+MipsFunctionInfo::~MipsFunctionInfo() {
+ for (StringMap<const MipsCallEntry *>::iterator
+ I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
+ ++I)
+ delete I->getValue();
+
+ for (ValueMap<const GlobalValue *, const MipsCallEntry *>::iterator
+ I = GlobalCallEntries.begin(), E = GlobalCallEntries.end(); I != E; ++I)
+ delete I->second;
+}
+
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
}
@@ -72,4 +120,22 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const StringRef &Name) {
+ const MipsCallEntry *&E = ExternalCallEntries[Name];
+
+ if (!E)
+ E = new MipsCallEntry(Name);
+
+ return MachinePointerInfo(E);
+}
+
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
+ const MipsCallEntry *&E = GlobalCallEntries[Val];
+
+ if (!E)
+ E = new MipsCallEntry(Val);
+
+ return MachinePointerInfo(E);
+}
+
void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index b05b348..43bf682 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,56 +15,48 @@
#define MIPS_MACHINE_FUNCTION_INFO_H
#include "MipsSubtarget.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include <utility>
namespace llvm {
+/// \brief A class derived from PseudoSourceValue that represents a GOT entry
+/// resolved by lazy-binding.
+class MipsCallEntry : public PseudoSourceValue {
+public:
+ explicit MipsCallEntry(const StringRef &N);
+ explicit MipsCallEntry(const GlobalValue *V);
+ virtual bool isConstant(const MachineFrameInfo *) const;
+ virtual bool isAliased(const MachineFrameInfo *) const;
+ virtual bool mayAlias(const MachineFrameInfo *) const;
+
+private:
+ virtual void printCustom(raw_ostream &O) const;
+#ifndef NDEBUG
+ std::string Name;
+ const GlobalValue *Val;
+#endif
+};
+
/// MipsFunctionInfo - This class is derived from MachineFunction private
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
-
- MachineFunction& MF;
- /// SRetReturnReg - Some subtargets require that sret lowering includes
- /// returning the value of the returned struct in a register. This field
- /// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
-
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
- /// Mips16SPAliasReg - keeps track of the virtual register initialized for
- /// use as an alias for SP for use in load/store of halfword/byte from/to
- /// the stack
- unsigned Mips16SPAliasReg;
-
- /// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
-
- /// True if function has a byval argument.
- bool HasByvalArg;
-
- /// Size of incoming argument area.
- unsigned IncomingArgSize;
-
- /// CallsEhReturn - Whether the function calls llvm.eh.return.
- bool CallsEhReturn;
-
- /// Frame objects for spilling eh data registers.
- int EhDataRegFI[4];
-
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
VarArgsFrameIndex(0), CallsEhReturn(false)
{}
+ ~MipsFunctionInfo();
+
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -92,6 +84,51 @@ public:
int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
bool isEhDataRegFI(int FI) const;
+ /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// representing a GOT entry for an external function.
+ MachinePointerInfo callPtrInfo(const StringRef &Name);
+
+ /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// representing a GOT entry for a global function.
+ MachinePointerInfo callPtrInfo(const GlobalValue *Val);
+
+private:
+ virtual void anchor();
+
+ MachineFunction& MF;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+ /// Mips16SPAliasReg - keeps track of the virtual register initialized for
+ /// use as an alias for SP for use in load/store of halfword/byte from/to
+ /// the stack
+ unsigned Mips16SPAliasReg;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+ /// True if function has a byval argument.
+ bool HasByvalArg;
+
+ /// Size of incoming argument area.
+ unsigned IncomingArgSize;
+
+ /// CallsEhReturn - Whether the function calls llvm.eh.return.
+ bool CallsEhReturn;
+
+ /// Frame objects for spilling eh data registers.
+ int EhDataRegFI[4];
+
+ /// MipsCallEntry maps.
+ StringMap<const MipsCallEntry *> ExternalCallEntries;
+ ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp b/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
deleted file mode 100644
index de3f09c..0000000
--- a/lib/Target/Mips/MipsOptimizeMathLibCalls.cpp
+++ /dev/null
@@ -1,175 +0,0 @@
-//===---- MipsOptimizeMathLibCalls.cpp - Optimize math lib calls. ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass does an IR transformation which enables the backend to emit native
-// math instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsTargetMachine.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-using namespace llvm;
-
-static cl::opt<bool> DisableOpt("disable-mips-math-optimization",
- cl::init(false),
- cl::desc("MIPS: Disable math lib call "
- "optimization."), cl::Hidden);
-
-namespace {
- class MipsOptimizeMathLibCalls : public FunctionPass {
- public:
- static char ID;
-
- MipsOptimizeMathLibCalls(MipsTargetMachine &TM_) :
- FunctionPass(ID), TM(TM_) {}
-
- virtual const char *getPassName() const {
- return "MIPS: Optimize calls to math library functions.";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- virtual bool runOnFunction(Function &F);
-
- private:
- /// Optimize calls to sqrt.
- bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
- BasicBlock &CurrBB,
- Function::iterator &BB);
-
- const TargetMachine &TM;
- };
-
- char MipsOptimizeMathLibCalls::ID = 0;
-}
-
-FunctionPass *llvm::createMipsOptimizeMathLibCalls(MipsTargetMachine &TM) {
- return new MipsOptimizeMathLibCalls(TM);
-}
-
-void MipsOptimizeMathLibCalls::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetLibraryInfo>();
- FunctionPass::getAnalysisUsage(AU);
-}
-
-bool MipsOptimizeMathLibCalls::runOnFunction(Function &F) {
- if (DisableOpt)
- return false;
-
- const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
-
- if (Subtarget.inMips16Mode())
- return false;
-
- bool Changed = false;
- Function::iterator CurrBB;
- const TargetLibraryInfo *LibInfo = &getAnalysis<TargetLibraryInfo>();
-
- for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) {
- CurrBB = BB++;
-
- for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end();
- II != IE; ++II) {
- CallInst *Call = dyn_cast<CallInst>(&*II);
- Function *CalledFunc;
-
- if (!Call || !(CalledFunc = Call->getCalledFunction()))
- continue;
-
- LibFunc::Func LibFunc;
- Attribute A = CalledFunc->getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "use-soft-float");
-
- // Skip if function has "use-soft-float" attribute.
- if ((A.isStringAttribute() && (A.getValueAsString() == "true")) ||
- TM.Options.UseSoftFloat)
- continue;
-
- // Skip if function either has local linkage or is not a known library
- // function.
- if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
- !LibInfo->getLibFunc(CalledFunc->getName(), LibFunc))
- continue;
-
- switch (LibFunc) {
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
- if (optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
- break;
- continue;
- default:
- continue;
- }
-
- Changed = true;
- break;
- }
- }
-
- return Changed;
-}
-
-bool MipsOptimizeMathLibCalls::optimizeSQRT(CallInst *Call,
- Function *CalledFunc,
- BasicBlock &CurrBB,
- Function::iterator &BB) {
- // There is no need to change the IR, since backend will emit sqrt
- // instruction if the call has already been marked read-only.
- if (Call->onlyReadsMemory())
- return false;
-
- // Do the following transformation:
- //
- // (before)
- // dst = sqrt(src)
- //
- // (after)
- // v0 = sqrt_noreadmem(src) # native sqrt instruction.
- // if (v0 is a NaN)
- // v1 = sqrt(src) # library call.
- // dst = phi(v0, v1)
- //
-
- // Move all instructions following Call to newly created block JoinBB.
- // Create phi and replace all uses.
- BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode(), this);
- IRBuilder<> Builder(JoinBB, JoinBB->begin());
- PHINode *Phi = Builder.CreatePHI(Call->getType(), 2);
- Call->replaceAllUsesWith(Phi);
-
- // Create basic block LibCallBB and insert a call to library function sqrt.
- BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
- CurrBB.getParent(), JoinBB);
- Builder.SetInsertPoint(LibCallBB);
- Instruction *LibCall = Call->clone();
- Builder.Insert(LibCall);
- Builder.CreateBr(JoinBB);
-
- // Add attribute "readnone" so that backend can use a native sqrt instruction
- // for this call. Insert a FP compare instruction and a conditional branch
- // at the end of CurrBB.
- Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
- CurrBB.getTerminator()->eraseFromParent();
- Builder.SetInsertPoint(&CurrBB);
- Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
- Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);
-
- // Add phi operands.
- Phi->addIncoming(Call, &CurrBB);
- Phi->addIncoming(LibCall, LibCallBB);
-
- BB = JoinBB;
- return true;
-}
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 1919077..fe60841 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -14,9 +14,17 @@
#define DEBUG_TYPE "mips-os16"
#include "MipsOs16.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
+static cl::opt<std::string> Mips32FunctionMask(
+ "mips32-function-mask",
+ cl::init(""),
+ cl::desc("Force function to be mips32"),
+ cl::Hidden);
+
namespace {
// Figure out if we need float point based on the function signature.
@@ -85,18 +93,43 @@ namespace llvm {
bool MipsOs16::runOnModule(Module &M) {
- DEBUG(errs() << "Run on Module MipsOs16\n");
+ bool usingMask = Mips32FunctionMask.length() > 0;
+ bool doneUsingMask = false; // this will make it stop repeating
+ DEBUG(dbgs() << "Run on Module MipsOs16 \n" << Mips32FunctionMask << "\n");
+ if (usingMask)
+ DEBUG(dbgs() << "using mask \n" << Mips32FunctionMask << "\n");
+ unsigned int functionIndex = 0;
bool modified = false;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
if (F->isDeclaration()) continue;
DEBUG(dbgs() << "Working on " << F->getName() << "\n");
- if (needsFP(*F)) {
- DEBUG(dbgs() << " need to compile as nomips16 \n");
- F->addFnAttr("nomips16");
+ if (usingMask) {
+ if (!doneUsingMask) {
+ if (functionIndex == Mips32FunctionMask.length())
+ functionIndex = 0;
+ switch (Mips32FunctionMask[functionIndex]) {
+ case '1':
+ DEBUG(dbgs() << "mask forced mips32: " << F->getName() << "\n");
+ F->addFnAttr("nomips16");
+ break;
+ case '.':
+ doneUsingMask = true;
+ break;
+ default:
+ break;
+ }
+ functionIndex++;
+ }
}
else {
- F->addFnAttr("mips16");
- DEBUG(dbgs() << " no need to compile as nomips16 \n");
+ if (needsFP(*F)) {
+ DEBUG(dbgs() << "os16 forced mips32: " << F->getName() << "\n");
+ F->addFnAttr("nomips16");
+ }
+ else {
+ DEBUG(dbgs() << "os16 forced mips16: " << F->getName() << "\n");
+ F->addFnAttr("mips16");
+ }
}
}
return modified;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 0b5fc33..3105b02 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -47,6 +47,11 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+const TargetRegisterClass *
+MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+}
unsigned
MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
@@ -56,7 +61,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return 0;
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
- case Mips::DSPRegsRegClassID: {
+ case Mips::DSPRRegClassID: {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
@@ -78,26 +83,34 @@ const uint16_t* MipsRegisterInfo::
getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
- else if (!Subtarget.hasMips64())
- return CSR_O32_SaveList;
- else if (Subtarget.isABI_N32())
+
+ if (Subtarget.isABI_N64())
+ return CSR_N64_SaveList;
+
+ if (Subtarget.isABI_N32())
return CSR_N32_SaveList;
- assert(Subtarget.isABI_N64());
- return CSR_N64_SaveList;
+ if (Subtarget.isFP64bit())
+ return CSR_O32_FP64_SaveList;
+
+ return CSR_O32_SaveList;
}
const uint32_t*
MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
- else if (!Subtarget.hasMips64())
- return CSR_O32_RegMask;
- else if (Subtarget.isABI_N32())
+
+ if (Subtarget.isABI_N64())
+ return CSR_N64_RegMask;
+
+ if (Subtarget.isABI_N32())
return CSR_N32_RegMask;
- assert(Subtarget.isABI_N64());
- return CSR_N64_RegMask;
+ if (Subtarget.isFP64bit())
+ return CSR_O32_FP64_RegMask;
+
+ return CSR_O32_RegMask;
}
const uint32_t *MipsRegisterInfo::getMips16RetHelperMask() {
@@ -123,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
for (unsigned I = 0; I < array_lengthof(ReservedGPR64); ++I)
Reserved.set(ReservedGPR64[I]);
- if (Subtarget.hasMips64()) {
+ if (Subtarget.isFP64bit()) {
// Reserve all registers in AFGR64.
for (RegIter Reg = Mips::AFGR64RegClass.begin(),
EReg = Mips::AFGR64RegClass.end(); Reg != EReg; ++Reg)
@@ -146,7 +159,6 @@ getReservedRegs(const MachineFunction &MF) const {
// Reserve hardware registers.
Reserved.set(Mips::HWR29);
- Reserved.set(Mips::HWR29_64);
// Reserve DSP control register.
Reserved.set(Mips::DSPPos);
@@ -155,6 +167,16 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::DSPEFI);
Reserved.set(Mips::DSPOutFlag);
+ // Reserve MSA control registers.
+ Reserved.set(Mips::MSAIR);
+ Reserved.set(Mips::MSACSR);
+ Reserved.set(Mips::MSAAccess);
+ Reserved.set(Mips::MSASave);
+ Reserved.set(Mips::MSAModify);
+ Reserved.set(Mips::MSARequest);
+ Reserved.set(Mips::MSAMap);
+ Reserved.set(Mips::MSAUnmap);
+
// Reserve RA if in mips16 mode.
if (Subtarget.inMips16Mode()) {
Reserved.set(Mips::RA);
@@ -218,12 +240,3 @@ getFrameRegister(const MachineFunction &MF) const {
}
-unsigned MipsRegisterInfo::
-getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned MipsRegisterInfo::
-getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 20ba41d..0450c6f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,6 +42,9 @@ public:
void adjustMipsStackFrame(MachineFunction &MF) const;
/// Code Generation virtual methods...
+ const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const;
+
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
@@ -65,10 +68,6 @@ public:
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- /// Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
-
/// \brief Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index c72c30d..3173d09 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -11,9 +11,8 @@
// Declarations that describe the MIPS register file
//===----------------------------------------------------------------------===//
let Namespace = "Mips" in {
-def sub_fpeven : SubRegIndex<32>;
-def sub_fpodd : SubRegIndex<32, 32>;
def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
def sub_lo : SubRegIndex<32>;
def sub_hi : SubRegIndex<32, 32>;
def sub_dsp16_19 : SubRegIndex<4, 16>;
@@ -54,17 +53,24 @@ class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
// Mips 64-bit (aliased) FPU Registers
class AFPR<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
- let SubRegIndices = [sub_fpeven, sub_fpodd];
+ let SubRegIndices = [sub_lo, sub_hi];
let CoveredBySubRegs = 1;
}
class AFPR64<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
- let SubRegIndices = [sub_32];
+ let SubRegIndices = [sub_lo, sub_hi];
+ let CoveredBySubRegs = 1;
+}
+
+// Mips 128-bit (aliased) MSA Registers
+class AFPR128<bits<16> Enc, string n, list<Register> subregs>
+ : MipsRegWithSubRegs<Enc, n, subregs> {
+ let SubRegIndices = [sub_64];
}
// Accumulator Registers
-class ACC<bits<16> Enc, string n, list<Register> subregs>
+class ACCReg<bits<16> Enc, string n, list<Register> subregs>
: MipsRegWithSubRegs<Enc, n, subregs> {
let SubRegIndices = [sub_lo, sub_hi];
let CoveredBySubRegs = 1;
@@ -150,6 +156,10 @@ let Namespace = "Mips" in {
foreach I = 0-31 in
def F#I : FPR<I, "f"#I>, DwarfRegNum<[!add(I, 32)]>;
+ // Higher half of 64-bit FP registers.
+ foreach I = 0-31 in
+ def F_HI#I : FPR<I, "f"#I>, DwarfRegNum<[!add(I, 32)]>;
+
/// Mips Double point precision FPU Registers (aliased
/// with the single precision to hold 64 bit values)
foreach I = 0-15 in
@@ -159,22 +169,28 @@ let Namespace = "Mips" in {
/// Mips Double point precision FPU Registers in MFP64 mode.
foreach I = 0-31 in
- def D#I#_64 : AFPR64<I, "f"#I, [!cast<FPR>("F"#I)]>,
+ def D#I#_64 : AFPR64<I, "f"#I, [!cast<FPR>("F"#I), !cast<FPR>("F_HI"#I)]>,
DwarfRegNum<[!add(I, 32)]>;
+ /// Mips MSA registers
+ /// MSA and FPU cannot both be present unless the FPU has 64-bit registers
+ foreach I = 0-31 in
+ def W#I : AFPR128<I, "w"#I, [!cast<AFPR64>("D"#I#"_64")]>,
+ DwarfRegNum<[!add(I, 32)]>;
+
// Hi/Lo registers
- def HI : Register<"ac0">, DwarfRegNum<[64]>;
- def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
- def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
- def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
- def LO : Register<"ac0">, DwarfRegNum<[65]>;
- def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
- def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
- def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
+ def HI0 : MipsReg<0, "ac0">, DwarfRegNum<[64]>;
+ def HI1 : MipsReg<1, "ac1">, DwarfRegNum<[176]>;
+ def HI2 : MipsReg<2, "ac2">, DwarfRegNum<[178]>;
+ def HI3 : MipsReg<3, "ac3">, DwarfRegNum<[180]>;
+ def LO0 : MipsReg<0, "ac0">, DwarfRegNum<[65]>;
+ def LO1 : MipsReg<1, "ac1">, DwarfRegNum<[177]>;
+ def LO2 : MipsReg<2, "ac2">, DwarfRegNum<[179]>;
+ def LO3 : MipsReg<3, "ac3">, DwarfRegNum<[181]>;
let SubRegIndices = [sub_32] in {
- def HI64 : RegisterWithSubRegs<"hi", [HI]>;
- def LO64 : RegisterWithSubRegs<"lo", [LO]>;
+ def HI0_64 : RegisterWithSubRegs<"hi", [HI0]>;
+ def LO0_64 : RegisterWithSubRegs<"lo", [LO0]>;
}
// FP control registers.
@@ -185,20 +201,22 @@ let Namespace = "Mips" in {
foreach I = 0-7 in
def FCC#I : MipsReg<#I, "fcc"#I>;
+ // COP2 registers.
+ foreach I = 0-31 in
+ def COP2#I : MipsReg<#I, ""#I>;
+
// PC register
def PC : Register<"pc">;
// Hardware register $29
def HWR29 : MipsReg<29, "29">;
- def HWR29_64 : MipsReg<29, "29">;
// Accum registers
- def AC0 : ACC<0, "ac0", [LO, HI]>;
- def AC1 : ACC<1, "ac1", [LO1, HI1]>;
- def AC2 : ACC<2, "ac2", [LO2, HI2]>;
- def AC3 : ACC<3, "ac3", [LO3, HI3]>;
+ foreach I = 0-3 in
+ def AC#I : ACCReg<#I, "ac"#I,
+ [!cast<Register>("LO"#I), !cast<Register>("HI"#I)]>;
- def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
+ def AC0_64 : ACCReg<0, "ac0", [LO0_64, HI0_64]>;
// DSP-ASE control register fields.
def DSPPos : Register<"">;
@@ -217,6 +235,16 @@ let Namespace = "Mips" in {
def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20,
DSPOutFlag21, DSPOutFlag22,
DSPOutFlag23]>;
+
+ // MSA-ASE control registers.
+ def MSAIR : MipsReg<0, "0">;
+ def MSACSR : MipsReg<1, "1">;
+ def MSAAccess : MipsReg<2, "2">;
+ def MSASave : MipsReg<3, "3">;
+ def MSAModify : MipsReg<4, "4">;
+ def MSARequest : MipsReg<5, "5">;
+ def MSAMap : MipsReg<6, "6">;
+ def MSAUnmap : MipsReg<7, "7">;
}
//===----------------------------------------------------------------------===//
@@ -239,7 +267,7 @@ class GPR32Class<list<ValueType> regTypes> :
K0, K1, GP, SP, FP, RA)>;
def GPR32 : GPR32Class<[i32]>;
-def DSPRegs : GPR32Class<[v4i8, v2i16]>;
+def DSPR : GPR32Class<[v4i8, v2i16]>;
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
// Reserved
@@ -281,6 +309,9 @@ def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable;
// * FGR32 - 32 32-bit registers (single float only mode)
def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
+def FGRH32 : RegisterClass<"Mips", [f32], 32, (sequence "F_HI%u", 0, 31)>,
+ Unallocatable;
+
def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
// Return Values and Arguments
D0, D1,
@@ -303,33 +334,48 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>,
Unallocatable;
+def MSA128B: RegisterClass<"Mips", [v16i8], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
+ (sequence "W%u", 0, 31)>;
+def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
+ (sequence "W%u", 0, 31)>;
+
+def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
+ MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
+
// Hi/Lo Registers
-def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>;
-def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>;
-def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
-def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
-def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>;
-def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>;
+def LO32 : RegisterClass<"Mips", [i32], 32, (add LO0)>;
+def HI32 : RegisterClass<"Mips", [i32], 32, (add HI0)>;
+def LO32DSP : RegisterClass<"Mips", [i32], 32, (sequence "LO%u", 0, 3)>;
+def HI32DSP : RegisterClass<"Mips", [i32], 32, (sequence "HI%u", 0, 3)>;
+def LO64 : RegisterClass<"Mips", [i64], 64, (add LO0_64)>;
+def HI64 : RegisterClass<"Mips", [i64], 64, (add HI0_64)>;
// Hardware registers
def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
-def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable;
// Accumulator Registers
-def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
+def ACC64 : RegisterClass<"Mips", [untyped], 64, (add AC0)> {
let Size = 64;
}
-def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
+def ACC128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> {
let Size = 128;
}
-def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
+def ACC64DSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
let Size = 64;
}
def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+// Coprocessor 2 registers.
+def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
+ Unallocatable;
+
// Register Operands.
class MipsAsmRegOperand : AsmOperandClass {
@@ -345,9 +391,19 @@ def GPR64AsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseGPR64";
}
-def ACRegsDSPAsmOperand : MipsAsmRegOperand {
- let Name = "ACRegsDSPAsm";
- let ParserMethod = "parseACRegsDSP";
+def ACC64DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "ACC64DSPAsm";
+ let ParserMethod = "parseACC64DSP";
+}
+
+def LO32DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "LO32DSPAsm";
+ let ParserMethod = "parseLO32DSP";
+}
+
+def HI32DSPAsmOperand : MipsAsmRegOperand {
+ let Name = "HI32DSPAsm";
+ let ParserMethod = "parseHI32DSP";
}
def CCRAsmOperand : MipsAsmRegOperand {
@@ -370,11 +426,41 @@ def FGR32AsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseFGR32Regs";
}
+def FGRH32AsmOperand : MipsAsmRegOperand {
+ let Name = "FGRH32Asm";
+ let ParserMethod = "parseFGRH32Regs";
+}
+
def FCCRegsAsmOperand : MipsAsmRegOperand {
let Name = "FCCRegsAsm";
let ParserMethod = "parseFCCRegs";
}
+def MSA128BAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128BAsm";
+ let ParserMethod = "parseMSA128BRegs";
+}
+
+def MSA128HAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128HAsm";
+ let ParserMethod = "parseMSA128HRegs";
+}
+
+def MSA128WAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128WAsm";
+ let ParserMethod = "parseMSA128WRegs";
+}
+
+def MSA128DAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128DAsm";
+ let ParserMethod = "parseMSA128DRegs";
+}
+
+def MSA128CRAsmOperand : MipsAsmRegOperand {
+ let Name = "MSA128CRAsm";
+ let ParserMethod = "parseMSA128CtrlRegs";
+}
+
def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
@@ -383,6 +469,10 @@ def GPR64Opnd : RegisterOperand<GPR64> {
let ParserMatchClass = GPR64AsmOperand;
}
+def DSPROpnd : RegisterOperand<DSPR> {
+ let ParserMatchClass = GPR32AsmOperand;
+}
+
def CCROpnd : RegisterOperand<CCR> {
let ParserMatchClass = CCRAsmOperand;
}
@@ -392,35 +482,68 @@ def HWRegsAsmOperand : MipsAsmRegOperand {
let ParserMethod = "parseHWRegs";
}
-def HW64RegsAsmOperand : MipsAsmRegOperand {
- let Name = "HW64RegsAsm";
- let ParserMethod = "parseHW64Regs";
+def COP2AsmOperand : MipsAsmRegOperand {
+ let Name = "COP2Asm";
+ let ParserMethod = "parseCOP2";
}
def HWRegsOpnd : RegisterOperand<HWRegs> {
let ParserMatchClass = HWRegsAsmOperand;
}
-def HW64RegsOpnd : RegisterOperand<HWRegs64> {
- let ParserMatchClass = HW64RegsAsmOperand;
-}
-
-def AFGR64RegsOpnd : RegisterOperand<AFGR64> {
+def AFGR64Opnd : RegisterOperand<AFGR64> {
let ParserMatchClass = AFGR64AsmOperand;
}
-def FGR64RegsOpnd : RegisterOperand<FGR64> {
+def FGR64Opnd : RegisterOperand<FGR64> {
let ParserMatchClass = FGR64AsmOperand;
}
-def FGR32RegsOpnd : RegisterOperand<FGR32> {
+def FGR32Opnd : RegisterOperand<FGR32> {
let ParserMatchClass = FGR32AsmOperand;
}
+def FGRH32Opnd : RegisterOperand<FGRH32> {
+ let ParserMatchClass = FGRH32AsmOperand;
+}
+
def FCCRegsOpnd : RegisterOperand<FCC> {
let ParserMatchClass = FCCRegsAsmOperand;
}
-def ACRegsDSPOpnd : RegisterOperand<ACRegsDSP> {
- let ParserMatchClass = ACRegsDSPAsmOperand;
+def LO32DSPOpnd : RegisterOperand<LO32DSP> {
+ let ParserMatchClass = LO32DSPAsmOperand;
}
+
+def HI32DSPOpnd : RegisterOperand<HI32DSP> {
+ let ParserMatchClass = HI32DSPAsmOperand;
+}
+
+def ACC64DSPOpnd : RegisterOperand<ACC64DSP> {
+ let ParserMatchClass = ACC64DSPAsmOperand;
+}
+
+def COP2Opnd : RegisterOperand<COP2> {
+ let ParserMatchClass = COP2AsmOperand;
+}
+
+def MSA128BOpnd : RegisterOperand<MSA128B> {
+ let ParserMatchClass = MSA128BAsmOperand;
+}
+
+def MSA128HOpnd : RegisterOperand<MSA128H> {
+ let ParserMatchClass = MSA128HAsmOperand;
+}
+
+def MSA128WOpnd : RegisterOperand<MSA128W> {
+ let ParserMatchClass = MSA128WAsmOperand;
+}
+
+def MSA128DOpnd : RegisterOperand<MSA128D> {
+ let ParserMatchClass = MSA128DAsmOperand;
+}
+
+def MSA128CROpnd : RegisterOperand<MSACtrl> {
+ let ParserMatchClass = MSA128CRAsmOperand;
+}
+
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index d9e0fa4..33ed4b3 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -32,6 +32,21 @@ using namespace llvm;
namespace {
typedef MachineBasicBlock::iterator Iter;
+static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
+ if (Mips::ACC64RegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::PseudoMFHI,
+ (unsigned)Mips::PseudoMFLO);
+
+ if (Mips::ACC64DSPRegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::MFHI_DSP, (unsigned)Mips::MFLO_DSP);
+
+ if (Mips::ACC128RegClass.contains(Src))
+ return std::make_pair((unsigned)Mips::PseudoMFHI64,
+ (unsigned)Mips::PseudoMFLO64);
+
+ return std::make_pair(0, 0);
+}
+
/// Helper class to expand pseudos.
class ExpandPseudo {
public:
@@ -43,10 +58,11 @@ private:
void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
- void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+ void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc,
+ unsigned MFLoOpc, unsigned RegSize);
bool expandCopy(MachineBasicBlock &MBB, Iter I);
- bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
- unsigned Src, unsigned RegSize);
+ bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned MFHiOpc,
+ unsigned MFLoOpc);
MachineFunction &MF;
MachineRegisterInfo &MRI;
@@ -70,32 +86,26 @@ bool ExpandPseudo::expand() {
bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
switch(I->getOpcode()) {
case Mips::LOAD_CCOND_DSP:
- case Mips::LOAD_CCOND_DSP_P8:
expandLoadCCond(MBB, I);
break;
case Mips::STORE_CCOND_DSP:
- case Mips::STORE_CCOND_DSP_P8:
expandStoreCCond(MBB, I);
break;
- case Mips::LOAD_AC64:
- case Mips::LOAD_AC64_P8:
- case Mips::LOAD_AC_DSP:
- case Mips::LOAD_AC_DSP_P8:
+ case Mips::LOAD_ACC64:
+ case Mips::LOAD_ACC64DSP:
expandLoadACC(MBB, I, 4);
break;
- case Mips::LOAD_AC128:
- case Mips::LOAD_AC128_P8:
+ case Mips::LOAD_ACC128:
expandLoadACC(MBB, I, 8);
break;
- case Mips::STORE_AC64:
- case Mips::STORE_AC64_P8:
- case Mips::STORE_AC_DSP:
- case Mips::STORE_AC_DSP_P8:
- expandStoreACC(MBB, I, 4);
+ case Mips::STORE_ACC64:
+ expandStoreACC(MBB, I, Mips::PseudoMFHI, Mips::PseudoMFLO, 4);
+ break;
+ case Mips::STORE_ACC64DSP:
+ expandStoreACC(MBB, I, Mips::MFHI_DSP, Mips::MFLO_DSP, 4);
break;
- case Mips::STORE_AC128:
- case Mips::STORE_AC128_P8:
- expandStoreACC(MBB, I, 8);
+ case Mips::STORE_ACC128:
+ expandStoreACC(MBB, I, Mips::PseudoMFHI64, Mips::PseudoMFLO64, 8);
break;
case TargetOpcode::COPY:
if (!expandCopy(MBB, I))
@@ -179,10 +189,11 @@ void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
}
void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
+ unsigned MFHiOpc, unsigned MFLoOpc,
unsigned RegSize) {
- // copy $vr0, lo
+ // mflo $vr0, src
// store $vr0, FI
- // copy $vr1, hi
+ // mfhi $vr1, src
// store $vr1, FI + 4
assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
@@ -197,33 +208,29 @@ void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
unsigned VR1 = MRI.createVirtualRegister(RC);
unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
unsigned SrcKill = getKillRegState(I->getOperand(0).isKill());
- unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo);
- unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src);
TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0);
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill);
TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
}
bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
- unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
-
- if (Mips::ACRegsDSPRegClass.contains(Dst, Src))
- return expandCopyACC(MBB, I, Dst, Src, 4);
+ unsigned Src = I->getOperand(1).getReg();
+ std::pair<unsigned, unsigned> Opcodes = getMFHiLoOpc(Src);
- if (Mips::ACRegs128RegClass.contains(Dst, Src))
- return expandCopyACC(MBB, I, Dst, Src, 8);
+ if (!Opcodes.first)
+ return false;
- return false;
+ return expandCopyACC(MBB, I, Opcodes.first, Opcodes.second);
}
-bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
- unsigned Src, unsigned RegSize) {
- // copy $vr0, src_lo
+bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
+ unsigned MFHiOpc, unsigned MFLoOpc) {
+ // mflo $vr0, src
// copy dst_lo, $vr0
- // copy $vr1, src_hi
+ // mfhi $vr1, src
// copy dst_hi, $vr1
const MipsSEInstrInfo &TII =
@@ -231,20 +238,20 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
const MipsRegisterInfo &RegInfo =
*static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
- const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
+ unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+ unsigned VRegSize = RegInfo.getMinimalPhysRegClass(Dst)->getSize() / 2;
+ const TargetRegisterClass *RC = RegInfo.intRegClass(VRegSize);
unsigned VR0 = MRI.createVirtualRegister(RC);
unsigned VR1 = MRI.createVirtualRegister(RC);
unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
- unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo);
- unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi);
DebugLoc DL = I->getDebugLoc();
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFLoOpc), VR0).addReg(Src);
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo)
.addReg(VR0, RegState::Kill);
- BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
+ BuildMI(MBB, I, DL, TII.get(MFHiOpc), VR1).addReg(Src, SrcKill);
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
.addReg(VR1, RegState::Kill);
return true;
@@ -321,9 +328,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
// one for each of the paired single precision registers.
if (Mips::AFGR64RegClass.contains(Reg)) {
unsigned Reg0 =
- MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpeven), true);
+ MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_lo), true);
unsigned Reg1 =
- MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_fpodd), true);
+ MRI->getDwarfRegNum(RegInfo.getSubReg(Reg, Mips::sub_hi), true);
if (!STI.isLittle())
std::swap(Reg0, Reg1);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 193a66c..8fa9e46 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -21,7 +21,7 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {}
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 3b6480a..737660e 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -66,6 +66,21 @@ void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MIB.addReg(Mips::DSPEFI, Flag);
}
+unsigned MipsSEDAGToDAGISel::getMSACtrlReg(const SDValue RegIdx) const {
+ switch (cast<ConstantSDNode>(RegIdx)->getZExtValue()) {
+ default:
+ llvm_unreachable("Could not map int to register");
+ case 0: return Mips::MSAIR;
+ case 1: return Mips::MSACSR;
+ case 2: return Mips::MSAAccess;
+ case 3: return Mips::MSASave;
+ case 4: return Mips::MSAModify;
+ case 5: return Mips::MSARequest;
+ case 6: return Mips::MSAMap;
+ case 7: return Mips::MSAUnmap;
+ }
+}
+
bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
const MachineInstr& MI) {
unsigned DstReg = 0, ZeroReg = 0;
@@ -301,6 +316,20 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
return false;
}
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsSEDAGToDAGISel::selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
Base = Addr;
@@ -314,6 +343,263 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
selectAddrDefault(Addr, Base, Offset);
}
+/// Used on microMIPS Load/Store unaligned instructions (12-bit offset)
+bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ EVT ValTy = Addr.getValueType();
+
+ // Addresses of the form FI+const or FI|const
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ if (isInt<12>(CN->getSExtValue())) {
+
+ // If the first operand is a FI then get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0)))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
+ else
+ Base = Addr.getOperand(0);
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ return selectAddrRegImm12(Addr, Base, Offset) ||
+ selectAddrDefault(Addr, Base, Offset);
+}
+
+// Select constant vector splats.
+//
+// Returns true and sets Imm if:
+// * MSA is enabled
+// * N is a ISD::BUILD_VECTOR representing a constant splat
+bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
+ if (!Subtarget.hasMSA())
+ return false;
+
+ BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
+
+ if (Node == NULL)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget.isLittle()))
+ return false;
+
+ Imm = SplatValue;
+
+ return true;
+}
+
+// Select constant vector splats.
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value fits in an integer with the specified signed-ness and
+// width.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+//
+// It's worth noting that this function is not used as part of the selection
+// of ldi.[bhwd] since it does not permit using the wrong-typed ldi.[bhwd]
+// instruction to achieve the desired bit pattern. ldi.[bhwd] is selected in
+// MipsSEDAGToDAGISel::selectNode.
+bool MipsSEDAGToDAGISel::
+selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+ unsigned ImmBitSize) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat (N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (( Signed && ImmValue.isSignedIntN(ImmBitSize)) ||
+ (!Signed && ImmValue.isIntN(ImmBitSize))) {
+ Imm = CurDAG->getTargetConstant(ImmValue, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm1(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 1);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm2(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 2);
+}
+
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm3(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 3);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm4(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 4);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm5(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 5);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm6(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 6);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatUimm8(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, false, 8);
+}
+
+// Select constant vector splats.
+bool MipsSEDAGToDAGISel::
+selectVSplatSimm5(SDValue N, SDValue &Imm) const {
+ return selectVSplatCommon(N, Imm, true, 5);
+}
+
+// Select constant vector splats whose value is a power of 2.
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a power of two.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat (N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = ImmValue.exactLogBase2();
+
+ if (Log2 != -1) {
+ Imm = CurDAG->getTargetConstant(Log2, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats whose value only has a consecutive sequence
+// of left-most bits set (e.g. 0b11...1100...00).
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a consecutive sequence of left-most bits.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ // Extract the run of set bits starting with bit zero from the bitwise
+ // inverse of ImmValue, and test that the inverse of this is the same
+ // as the original value.
+ if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
+
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Select constant vector splats whose value only has a consecutive sequence
+// of right-most bits set (e.g. 0b00...0011...11).
+//
+// In addition to the requirements of selectVSplat(), this function returns
+// true and sets Imm if:
+// * The splat value is the same width as the elements of the vector
+// * The splat value is a consecutive sequence of right-most bits.
+//
+// This function looks through ISD::BITCAST nodes.
+// TODO: This might not be appropriate for big-endian MSA since BITCAST is
+// sometimes a shuffle in big-endian mode.
+bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ // Extract the run of set bits starting with bit zero, and test that the
+ // result is the same as the original value
+ if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
+ SDValue &Imm) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int32_t Log2 = (~ImmValue).exactLogBase2();
+
+ if (Log2 != -1) {
+ Imm = CurDAG->getTargetConstant(Log2, EltTy);
+ return true;
+ }
+ }
+
+ return false;
+}
+
std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
SDLoc DL(Node);
@@ -348,6 +634,11 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO_64, MVT::i64);
Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
+ } else if (Subtarget.isFP64bit()) {
+ SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
+ Mips::ZERO, MVT::i32);
+ Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64,
+ Zero, Zero);
} else {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO, MVT::i32);
@@ -401,24 +692,71 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(true, RegOpnd);
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_cfcmsa: {
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue RegIdx = Node->getOperand(2);
+ SDValue Reg = CurDAG->getCopyFromReg(ChainIn, DL,
+ getMSACtrlReg(RegIdx), MVT::i32);
+ return std::make_pair(true, Reg.getNode());
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_move_v:
+ // Like an assignment but will always produce a move.v even if
+ // unnecessary.
+ return std::make_pair(true,
+ CurDAG->getMachineNode(Mips::MOVE_V, DL,
+ Node->getValueType(0),
+ Node->getOperand(1)));
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_VOID: {
+ switch (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+
+ case Intrinsic::mips_ctcmsa: {
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue RegIdx = Node->getOperand(2);
+ SDValue Value = Node->getOperand(3);
+ SDValue ChainOut = CurDAG->getCopyToReg(ChainIn, DL,
+ getMSACtrlReg(RegIdx), Value);
+ return std::make_pair(true, ChainOut.getNode());
+ }
+ }
+ break;
+ }
+
case MipsISD::ThreadPointer: {
EVT PtrVT = getTargetLowering()->getPointerTy();
- unsigned RdhwrOpc, SrcReg, DestReg;
+ unsigned RdhwrOpc, DestReg;
if (PtrVT == MVT::i32) {
RdhwrOpc = Mips::RDHWR;
- SrcReg = Mips::HWR29;
DestReg = Mips::V1;
} else {
RdhwrOpc = Mips::RDHWR64;
- SrcReg = Mips::HWR29_64;
DestReg = Mips::V1_64;
}
SDNode *Rdhwr =
CurDAG->getMachineNode(RdhwrOpc, SDLoc(Node),
Node->getValueType(0),
- CurDAG->getRegister(SrcReg, PtrVT));
+ CurDAG->getRegister(Mips::HWR29, MVT::i32));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
SDValue(Rdhwr, 0));
SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
@@ -426,18 +764,81 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(true, ResNode.getNode());
}
- case MipsISD::InsertLOHI: {
- unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID :
- Mips::ACRegsRegClassID;
- SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32);
- SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32);
- SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32);
- const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
- Node->getOperand(1), HiIdx };
- SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
- MVT::Untyped, Ops);
+ case ISD::BUILD_VECTOR: {
+ // Select appropriate ldi.[bhwd] instructions for constant splats of
+ // 128-bit when MSA is enabled. Fixup any register class mismatches that
+ // occur as a result.
+ //
+ // This allows the compiler to use a wider range of immediates than would
+ // otherwise be allowed. If, for example, v4i32 could only use ldi.h then
+ // it would not be possible to load { 0x01010101, 0x01010101, 0x01010101,
+ // 0x01010101 } without using a constant pool. This would be sub-optimal
+ // when // 'ldi.b wd, 1' is capable of producing that bit-pattern in the
+ // same set/ of registers. Similarly, ldi.h isn't capable of producing {
+ // 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can.
+
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned LdiOp;
+ EVT ResVecTy = BVN->getValueType(0);
+ EVT ViaVecTy;
+
+ if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector())
+ return std::make_pair(false, (SDNode*)NULL);
+
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget.isLittle()))
+ return std::make_pair(false, (SDNode*)NULL);
+
+ switch (SplatBitSize) {
+ default:
+ return std::make_pair(false, (SDNode*)NULL);
+ case 8:
+ LdiOp = Mips::LDI_B;
+ ViaVecTy = MVT::v16i8;
+ break;
+ case 16:
+ LdiOp = Mips::LDI_H;
+ ViaVecTy = MVT::v8i16;
+ break;
+ case 32:
+ LdiOp = Mips::LDI_W;
+ ViaVecTy = MVT::v4i32;
+ break;
+ case 64:
+ LdiOp = Mips::LDI_D;
+ ViaVecTy = MVT::v2i64;
+ break;
+ }
+
+ if (!SplatValue.isSignedIntN(10))
+ return std::make_pair(false, (SDNode*)NULL);
+
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue,
+ ViaVecTy.getVectorElementType());
+
+ SDNode *Res = CurDAG->getMachineNode(LdiOp, SDLoc(Node), ViaVecTy, Imm);
+
+ if (ResVecTy != ViaVecTy) {
+ // If LdiOp is writing to a different register class to ResVecTy, then
+ // fix it up here. This COPY_TO_REGCLASS should never cause a move.v
+ // since the source and destination register sets contain the same
+ // registers.
+ const TargetLowering *TLI = getTargetLowering();
+ MVT ResVecTySimple = ResVecTy.getSimpleVT();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(ResVecTySimple);
+ Res = CurDAG->getMachineNode(Mips::COPY_TO_REGCLASS, SDLoc(Node),
+ ResVecTy, SDValue(Res, 0),
+ CurDAG->getTargetConstant(RC->getID(),
+ MVT::i32));
+ }
+
return std::make_pair(true, Res);
}
+
}
return std::make_pair(false, (SDNode*)NULL);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 03ed1f9..dc52064 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -30,6 +30,8 @@ private:
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF);
+ unsigned getMSACtrlReg(const SDValue RegIdx) const;
+
bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, SDLoc dl,
@@ -41,12 +43,54 @@ private:
virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ virtual bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
+ /// \brief Select constant vector splats.
+ virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a given integer.
+ virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+ unsigned ImmBitSize) const;
+ /// \brief Select constant vector splats whose value fits in a uimm1.
+ virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm2.
+ virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm3.
+ virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm4.
+ virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm5.
+ virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm6.
+ virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a uimm8.
+ virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value fits in a simm5.
+ virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a power of 2.
+ virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is the inverse of a
+ /// power of 2.
+ virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// ending at the most significant bit
+ virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
+ /// \brief Select constant vector splats whose value is a run of set bits
+ /// starting at bit zero.
+ virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+
virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
virtual void processFunctionAfterISel(MachineFunction &MF);
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index a0aacb5..809adc0 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -10,6 +10,7 @@
// Subclass of MipsTargetLowering specialized for mips32/64.
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-isel"
#include "MipsSEISelLowering.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
@@ -17,6 +18,8 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
@@ -25,22 +28,40 @@ static cl::opt<bool>
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
cl::desc("MIPS: Enable tail calls."), cl::init(false));
+static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
+ cl::desc("Expand double precision loads and "
+ "stores to their single precision "
+ "counterparts"));
+
MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
// Set up the register classes
-
- clearRegisterClasses();
-
addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
if (HasMips64)
addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
+ if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
+ // Expand all truncating stores and extending loads.
+ unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+
+ for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
+ for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
+ setTruncStoreAction((MVT::SimpleValueType)VT0,
+ (MVT::SimpleValueType)VT1, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
+ }
+ }
+
if (Subtarget->hasDSP()) {
MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
for (unsigned i = 0; i < array_lengthof(VecTys); ++i) {
- addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass);
+ addRegisterClass(VecTys[i], &Mips::DSPRRegClass);
// Expand all builtin opcodes.
for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
@@ -53,20 +74,6 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BITCAST, VecTys[i], Legal);
}
- // Expand all truncating stores and extending loads.
- unsigned FirstVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- unsigned LastVT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-
- for (unsigned VT0 = FirstVT; VT0 <= LastVT; ++VT0) {
- for (unsigned VT1 = FirstVT; VT1 <= LastVT; ++VT1)
- setTruncStoreAction((MVT::SimpleValueType)VT0,
- (MVT::SimpleValueType)VT1, Expand);
-
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT0, Expand);
- }
-
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -77,12 +84,28 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
if (Subtarget->hasDSPR2())
setOperationAction(ISD::MUL, MVT::v2i16, Legal);
- if (!TM.Options.UseSoftFloat) {
+ if (Subtarget->hasMSA()) {
+ addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
+ addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
+ addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
+ addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
+ addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
+ addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
+ addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
+
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::OR);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::VSELECT);
+ setTargetDAGCombine(ISD::XOR);
+ }
+
+ if (!Subtarget->mipsSEUsesSoftFloat()) {
addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
// When dealing with single precision only, use libcalls
if (!Subtarget->isSingleFloat()) {
- if (HasMips64)
+ if (Subtarget->isFP64bit())
addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
else
addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
@@ -115,6 +138,15 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::SUBE);
setTargetDAGCombine(ISD::MUL);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
+ if (NoDPLoadStore) {
+ setOperationAction(ISD::LOAD, MVT::f64, Custom);
+ setOperationAction(ISD::STORE, MVT::f64, Custom);
+ }
+
computeRegisterProperties();
}
@@ -123,6 +155,93 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
return new MipsSETargetLowering(TM);
}
+// Enable MSA support for the given integer type and Register class.
+void MipsSETargetLowering::
+addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
+ addRegisterClass(Ty, RC);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, Ty, Expand);
+
+ setOperationAction(ISD::BITCAST, Ty, Legal);
+ setOperationAction(ISD::LOAD, Ty, Legal);
+ setOperationAction(ISD::STORE, Ty, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+
+ setOperationAction(ISD::ADD, Ty, Legal);
+ setOperationAction(ISD::AND, Ty, Legal);
+ setOperationAction(ISD::CTLZ, Ty, Legal);
+ setOperationAction(ISD::CTPOP, Ty, Legal);
+ setOperationAction(ISD::MUL, Ty, Legal);
+ setOperationAction(ISD::OR, Ty, Legal);
+ setOperationAction(ISD::SDIV, Ty, Legal);
+ setOperationAction(ISD::SREM, Ty, Legal);
+ setOperationAction(ISD::SHL, Ty, Legal);
+ setOperationAction(ISD::SRA, Ty, Legal);
+ setOperationAction(ISD::SRL, Ty, Legal);
+ setOperationAction(ISD::SUB, Ty, Legal);
+ setOperationAction(ISD::UDIV, Ty, Legal);
+ setOperationAction(ISD::UREM, Ty, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom);
+ setOperationAction(ISD::VSELECT, Ty, Legal);
+ setOperationAction(ISD::XOR, Ty, Legal);
+
+ if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
+ setOperationAction(ISD::FP_TO_SINT, Ty, Legal);
+ setOperationAction(ISD::FP_TO_UINT, Ty, Legal);
+ setOperationAction(ISD::SINT_TO_FP, Ty, Legal);
+ setOperationAction(ISD::UINT_TO_FP, Ty, Legal);
+ }
+
+ setOperationAction(ISD::SETCC, Ty, Legal);
+ setCondCodeAction(ISD::SETNE, Ty, Expand);
+ setCondCodeAction(ISD::SETGE, Ty, Expand);
+ setCondCodeAction(ISD::SETGT, Ty, Expand);
+ setCondCodeAction(ISD::SETUGE, Ty, Expand);
+ setCondCodeAction(ISD::SETUGT, Ty, Expand);
+}
+
+// Enable MSA support for the given floating-point type and Register class.
+void MipsSETargetLowering::
+addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
+ addRegisterClass(Ty, RC);
+
+ // Expand all builtin opcodes.
+ for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
+ setOperationAction(Opc, Ty, Expand);
+
+ setOperationAction(ISD::LOAD, Ty, Legal);
+ setOperationAction(ISD::STORE, Ty, Legal);
+ setOperationAction(ISD::BITCAST, Ty, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, Ty, Custom);
+
+ if (Ty != MVT::v8f16) {
+ setOperationAction(ISD::FABS, Ty, Legal);
+ setOperationAction(ISD::FADD, Ty, Legal);
+ setOperationAction(ISD::FDIV, Ty, Legal);
+ setOperationAction(ISD::FEXP2, Ty, Legal);
+ setOperationAction(ISD::FLOG2, Ty, Legal);
+ setOperationAction(ISD::FMA, Ty, Legal);
+ setOperationAction(ISD::FMUL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FSQRT, Ty, Legal);
+ setOperationAction(ISD::FSUB, Ty, Legal);
+ setOperationAction(ISD::VSELECT, Ty, Legal);
+
+ setOperationAction(ISD::SETCC, Ty, Legal);
+ setCondCodeAction(ISD::SETOGE, Ty, Expand);
+ setCondCodeAction(ISD::SETOGT, Ty, Expand);
+ setCondCodeAction(ISD::SETUGE, Ty, Expand);
+ setCondCodeAction(ISD::SETUGT, Ty, Expand);
+ setCondCodeAction(ISD::SETGE, Ty, Expand);
+ setCondCodeAction(ISD::SETGT, Ty, Expand);
+ }
+}
bool
MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
@@ -142,6 +261,8 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch(Op.getOpcode()) {
+ case ISD::LOAD: return lowerLOAD(Op, DAG);
+ case ISD::STORE: return lowerSTORE(Op, DAG);
case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
@@ -152,6 +273,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
DAG);
case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
}
return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -204,7 +329,7 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
SDLoc DL(ADDENode);
// Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
ADDCNode->getOperand(1),
ADDENode->getOperand(1));
@@ -218,15 +343,11 @@ static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) {
// replace uses of adde and addc here
if (!SDValue(ADDCNode, 0).use_empty()) {
- SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
- SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
- LoIdx);
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MAdd);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut);
}
if (!SDValue(ADDENode, 0).use_empty()) {
- SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
- SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd,
- HiIdx);
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MAdd);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut);
}
@@ -280,7 +401,7 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
SDLoc DL(SUBENode);
// Initialize accumulator.
- SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped,
+ SDValue ACCIn = CurDAG->getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
SUBCNode->getOperand(0),
SUBENode->getOperand(0));
@@ -294,15 +415,11 @@ static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) {
// replace uses of sube and subc here
if (!SDValue(SUBCNode, 0).use_empty()) {
- SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32);
- SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
- LoIdx);
+ SDValue LoOut = CurDAG->getNode(MipsISD::MFLO, DL, MVT::i32, MSub);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut);
}
if (!SDValue(SUBENode, 0).use_empty()) {
- SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32);
- SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub,
- HiIdx);
+ SDValue HiOut = CurDAG->getNode(MipsISD::MFHI, DL, MVT::i32, MSub);
CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut);
}
@@ -322,6 +439,248 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
+//
+// Performs the following transformations:
+// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
+// sign/zero-extension is completely overwritten by the new one performed by
+// the ISD::AND.
+// - Removes redundant zero extensions performed by an ISD::AND.
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (!Subtarget->hasMSA())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ unsigned Op0Opcode = Op0->getOpcode();
+
+ // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
+ // where $d + 1 == 2^n and n == 32
+ // or $d + 1 == 2^n and n <= 32 and ZExt
+ // -> (MipsVExtractZExt $a, $b, $c)
+ if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
+ Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1);
+
+ if (!Mask)
+ return SDValue();
+
+ int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
+
+ if (Log2IfPositive <= 0)
+ return SDValue(); // Mask+1 is not a power of 2
+
+ SDValue Op0Op2 = Op0->getOperand(2);
+ EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
+ unsigned ExtendTySize = ExtendTy.getSizeInBits();
+ unsigned Log2 = Log2IfPositive;
+
+ if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
+ Log2 == ExtendTySize) {
+ SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
+ DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
+ Op0->getVTList(), Ops, Op0->getNumOperands());
+ return Op0;
+ }
+ }
+
+ return SDValue();
+}
+
+// Determine if the specified node is a constant vector splat.
+//
+// Returns true and sets Imm if:
+// * N is a ISD::BUILD_VECTOR representing a constant splat
+//
+// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
+// differences are that it assumes the MSA has already been checked and the
+// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
+// must not be in order for binsri.d to be selectable).
+static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
+ BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
+
+ if (Node == NULL)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ 8, !IsLittleEndian))
+ return false;
+
+ Imm = SplatValue;
+
+ return true;
+}
+
+// Test whether the given node is an all-ones build_vector.
+static bool isVectorAllOnes(SDValue N) {
+ // Look through bitcasts. Endianness doesn't matter because we are looking
+ // for an all-ones value.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
+
+ if (!BVN)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ // Endianness doesn't matter in this context because we are looking for
+ // an all-ones value.
+ if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
+ return SplatValue.isAllOnesValue();
+
+ return false;
+}
+
+// Test whether N is the bitwise inverse of OfNode.
+static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
+ if (N->getOpcode() != ISD::XOR)
+ return false;
+
+ if (isVectorAllOnes(N->getOperand(0)))
+ return N->getOperand(1) == OfNode;
+
+ if (isVectorAllOnes(N->getOperand(1)))
+ return N->getOperand(0) == OfNode;
+
+ return false;
+}
+
+// Perform combines where ISD::OR is the root node.
+//
+// Performs the following transformations:
+// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
+// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
+// vector type.
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget *Subtarget) {
+ if (!Subtarget->hasMSA())
+ return SDValue();
+
+ EVT Ty = N->getValueType(0);
+
+ if (!Ty.is128BitVector())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
+ SDValue Op0Op0 = Op0->getOperand(0);
+ SDValue Op0Op1 = Op0->getOperand(1);
+ SDValue Op1Op0 = Op1->getOperand(0);
+ SDValue Op1Op1 = Op1->getOperand(1);
+ bool IsLittleEndian = !Subtarget->isLittle();
+
+ SDValue IfSet, IfClr, Cond;
+ bool IsConstantMask = false;
+ APInt Mask, InvMask;
+
+ // If Op0Op0 is an appropriate mask, try to find it's inverse in either
+ // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
+ // looking.
+ // IfClr will be set if we find a valid match.
+ if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+
+ if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op1;
+ else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op0;
+
+ IsConstantMask = true;
+ }
+
+ // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
+ // thing again using this mask.
+ // IfClr will be set if we find a valid match.
+ if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+
+ if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op1;
+ else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
+ Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
+ IfClr = Op1Op0;
+
+ IsConstantMask = true;
+ }
+
+ // If IfClr is not yet set, try looking for a non-constant match.
+ // IfClr will be set if we find a valid match amongst the eight
+ // possibilities.
+ if (!IfClr.getNode()) {
+ if (isBitwiseInverse(Op0Op0, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op0;
+ }
+ }
+
+ // At this point, IfClr will be set if we have a valid match.
+ if (!IfClr.getNode())
+ return SDValue();
+
+ assert(Cond.getNode() && IfSet.getNode());
+
+ // Fold degenerate cases.
+ if (IsConstantMask) {
+ if (Mask.isAllOnesValue())
+ return IfSet;
+ else if (Mask == 0)
+ return IfClr;
+ }
+
+ // Transform the DAG into an equivalent VSELECT.
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet);
+ }
+
+ return SDValue();
+}
+
static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
@@ -396,6 +755,9 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+ if (!Subtarget->hasDSP())
+ return SDValue();
+
if (!BV ||
!BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
EltSize, !Subtarget->isLittle()) ||
@@ -418,11 +780,57 @@ static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
}
+// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
+// constant splats into MipsISD::SHRA_DSP for DSPr2.
+//
+// Performs the following transformations:
+// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
+// sign/zero-extension is completely overwritten by the new one performed by
+// the ISD::SRA and ISD::SHL nodes.
+// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
+// sequence.
+//
+// See performDSPShiftCombine for more information about the transformation
+// used for DSPr2.
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget *Subtarget) {
EVT Ty = N->getValueType(0);
+ if (Subtarget->hasMSA()) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
+ // where $d + sizeof($c) == 32
+ // or $d + sizeof($c) <= 32 and SExt
+ // -> (MipsVExtractSExt $a, $b, $c)
+ if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
+ SDValue Op0Op0 = Op0->getOperand(0);
+ ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1);
+
+ if (!ShAmount)
+ return SDValue();
+
+ if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
+ Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
+ return SDValue();
+
+ EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
+ unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
+
+ if (TotalBits == 32 ||
+ (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
+ TotalBits <= 32)) {
+ SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
+ Op0Op0->getOperand(2) };
+ DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
+ Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands());
+ return Op0Op0;
+ }
+ }
+ }
+
if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
return SDValue();
@@ -475,17 +883,84 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
- if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
- return SDValue();
+ if (Ty.is128BitVector() && Ty.isInteger()) {
+ // Try the following combines:
+ // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b)
+ // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b)
+ // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b)
+ // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b)
+ // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b)
+ // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b)
+ // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b)
+ // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b)
+ // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but
+ // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the
+ // legalizer.
+ SDValue Op0 = N->getOperand(0);
+
+ if (Op0->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ ISD::CondCode CondCode = cast<CondCodeSDNode>(Op0->getOperand(2))->get();
+ bool Signed;
+
+ if (CondCode == ISD::SETLT || CondCode == ISD::SETLE)
+ Signed = true;
+ else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE)
+ Signed = false;
+ else
+ return SDValue();
+
+ SDValue Op1 = N->getOperand(1);
+ SDValue Op2 = N->getOperand(2);
+ SDValue Op0Op0 = Op0->getOperand(0);
+ SDValue Op0Op1 = Op0->getOperand(1);
+
+ if (Op1 == Op0Op0 && Op2 == Op0Op1)
+ return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N),
+ Ty, Op1, Op2);
+ else if (Op1 == Op0Op1 && Op2 == Op0Op0)
+ return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N),
+ Ty, Op1, Op2);
+ } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) {
+ SDValue SetCC = N->getOperand(0);
+
+ if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
+ return SDValue();
+
+ return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
+ SetCC.getOperand(0), SetCC.getOperand(1),
+ N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
+ }
- SDValue SetCC = N->getOperand(0);
+ return SDValue();
+}
- if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
- return SDValue();
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
+ const MipsSubtarget *Subtarget) {
+ EVT Ty = N->getValueType(0);
- return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
- SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1),
- N->getOperand(2), SetCC.getOperand(2));
+ if (Subtarget->hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
+ // Try the following combines:
+ // (xor (or $a, $b), (build_vector allones))
+ // (xor (or $a, $b), (bitcast (build_vector allones)))
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue NotOp;
+
+ if (ISD::isBuildVectorAllOnes(Op0.getNode()))
+ NotOp = Op1;
+ else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
+ NotOp = Op0;
+ else
+ return SDValue();
+
+ if (NotOp->getOpcode() == ISD::OR)
+ return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
+ NotOp->getOperand(1));
+ }
+
+ return SDValue();
}
SDValue
@@ -496,6 +971,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
case ISD::ADDE:
return performADDECombine(N, DAG, DCI, Subtarget);
+ case ISD::AND:
+ Val = performANDCombine(N, DAG, DCI, Subtarget);
+ break;
+ case ISD::OR:
+ Val = performORCombine(N, DAG, DCI, Subtarget);
+ break;
case ISD::SUBE:
return performSUBECombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
@@ -508,14 +989,22 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
return performSRLCombine(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
return performVSELECTCombine(N, DAG);
- case ISD::SETCC: {
+ case ISD::XOR:
+ Val = performXORCombine(N, DAG, Subtarget);
+ break;
+ case ISD::SETCC:
Val = performSETCCCombine(N, DAG);
break;
}
- }
- if (Val.getNode())
+ if (Val.getNode()) {
+ DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
+ N->printrWithDepth(dbgs(), &DAG);
+ dbgs() << "\n=> \n";
+ Val.getNode()->printrWithDepth(dbgs(), &DAG);
+ dbgs() << "\n");
return Val;
+ }
return MipsTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -528,6 +1017,42 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case Mips::BPOSGE32_PSEUDO:
return emitBPOSGE32(MI, BB);
+ case Mips::SNZ_B_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
+ case Mips::SNZ_H_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
+ case Mips::SNZ_W_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
+ case Mips::SNZ_D_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
+ case Mips::SNZ_V_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
+ case Mips::SZ_B_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
+ case Mips::SZ_H_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
+ case Mips::SZ_W_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
+ case Mips::SZ_D_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
+ case Mips::SZ_V_PSEUDO:
+ return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
+ case Mips::COPY_FW_PSEUDO:
+ return emitCOPY_FW(MI, BB);
+ case Mips::COPY_FD_PSEUDO:
+ return emitCOPY_FD(MI, BB);
+ case Mips::INSERT_FW_PSEUDO:
+ return emitINSERT_FW(MI, BB);
+ case Mips::INSERT_FD_PSEUDO:
+ return emitINSERT_FD(MI, BB);
+ case Mips::FILL_FW_PSEUDO:
+ return emitFILL_FW(MI, BB);
+ case Mips::FILL_FD_PSEUDO:
+ return emitFILL_FD(MI, BB);
+ case Mips::FEXP2_W_1_PSEUDO:
+ return emitFEXP2_W_1(MI, BB);
+ case Mips::FEXP2_D_1_PSEUDO:
+ return emitFEXP2_D_1(MI, BB);
}
}
@@ -564,6 +1089,68 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
InternalLinkage, CLI, Callee, Chain);
}
+SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ LoadSDNode &Nd = *cast<LoadSDNode>(Op);
+
+ if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+ return MipsTargetLowering::lowerLOAD(Op, DAG);
+
+ // Replace a double precision load with two i32 loads and a buildpair64.
+ SDLoc DL(Op);
+ SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+ EVT PtrVT = Ptr.getValueType();
+
+ // i32 load from lower address.
+ SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr,
+ MachinePointerInfo(), Nd.isVolatile(),
+ Nd.isNonTemporal(), Nd.isInvariant(),
+ Nd.getAlignment());
+
+ // i32 load from higher address.
+ Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+ SDValue Hi = DAG.getLoad(MVT::i32, DL, Lo.getValue(1), Ptr,
+ MachinePointerInfo(), Nd.isVolatile(),
+ Nd.isNonTemporal(), Nd.isInvariant(),
+ std::min(Nd.getAlignment(), 4U));
+
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+
+ SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+ SDValue Ops[2] = {BP, Hi.getValue(1)};
+ return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ StoreSDNode &Nd = *cast<StoreSDNode>(Op);
+
+ if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
+ return MipsTargetLowering::lowerSTORE(Op, DAG);
+
+ // Replace a double precision store with two extractelement64s and i32 stores.
+ SDLoc DL(Op);
+ SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
+ EVT PtrVT = Ptr.getValueType();
+ SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Val, DAG.getConstant(0, MVT::i32));
+ SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+ Val, DAG.getConstant(1, MVT::i32));
+
+ if (!Subtarget->isLittle())
+ std::swap(Lo, Hi);
+
+ // i32 store to lower address.
+ Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(),
+ Nd.isVolatile(), Nd.isNonTemporal(), Nd.getAlignment(),
+ Nd.getTBAAInfo());
+
+ // i32 store to higher address.
+ Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, PtrVT));
+ return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
+ Nd.isVolatile(), Nd.isNonTemporal(),
+ std::min(Nd.getAlignment(), 4U), Nd.getTBAAInfo());
+}
+
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
@@ -574,11 +1161,9 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
SDValue Lo, Hi;
if (HasLo)
- Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
- DAG.getConstant(Mips::sub_lo, MVT::i32));
+ Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
if (HasHi)
- Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult,
- DAG.getConstant(Mips::sub_hi, MVT::i32));
+ Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
if (!HasLo || !HasHi)
return HasLo ? Lo : Hi;
@@ -593,14 +1178,12 @@ static SDValue initAccumulator(SDValue In, SDLoc DL, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+ return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
}
static SDValue extractLOHI(SDValue Op, SDLoc DL, SelectionDAG &DAG) {
- SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_lo, MVT::i32));
- SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
- DAG.getConstant(Mips::sub_hi, MVT::i32));
+ SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
+ SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
}
@@ -664,8 +1247,156 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
return DAG.getMergeValues(Vals, 2, DL);
}
+// Lower an MSA copy intrinsic into the specified SelectionDAG node
+static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+ SDLoc DL(Op);
+ SDValue Vec = Op->getOperand(1);
+ SDValue Idx = Op->getOperand(2);
+ EVT ResTy = Op->getValueType(0);
+ EVT EltTy = Vec->getValueType(0).getVectorElementType();
+
+ SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
+ DAG.getValueType(EltTy));
+
+ return Result;
+}
+
+static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
+ EVT ResVecTy = Op->getValueType(0);
+ EVT ViaVecTy = ResVecTy;
+ SDLoc DL(Op);
+
+ // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
+ // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
+ // lanes.
+ SDValue LaneA;
+ SDValue LaneB = Op->getOperand(2);
+
+ if (ResVecTy == MVT::v2i64) {
+ LaneA = DAG.getConstant(0, MVT::i32);
+ ViaVecTy = MVT::v4i32;
+ } else
+ LaneA = LaneB;
+
+ SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
+ LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (ViaVecTy != ResVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
+
+ return Result;
+}
+
+static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
+ return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
+}
+
+static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
+ bool BigEndian, SelectionDAG &DAG) {
+ EVT ViaVecTy = VecTy;
+ SDValue SplatValueA = SplatValue;
+ SDValue SplatValueB = SplatValue;
+ SDLoc DL(SplatValue);
+
+ if (VecTy == MVT::v2i64) {
+ // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
+ ViaVecTy = MVT::v4i32;
+
+ SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
+ SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
+ DAG.getConstant(32, MVT::i32));
+ SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
+ }
+
+ // We currently hold the parts in little endian order. Swap them if
+ // necessary.
+ if (BigEndian)
+ std::swap(SplatValueA, SplatValueB);
+
+ SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB };
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (VecTy != ViaVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
+
+ return Result;
+}
+
+static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
+ unsigned Opc, SDValue Imm,
+ bool BigEndian) {
+ EVT VecTy = Op->getValueType(0);
+ SDValue Exp2Imm;
+ SDLoc DL(Op);
+
+ // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
+ // here for now.
+ if (VecTy == MVT::v2i64) {
+ if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
+ APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
+
+ SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
+ SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
+
+ if (BigEndian)
+ std::swap(BitImmLoOp, BitImmHiOp);
+
+ Exp2Imm =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp,
+ BitImmHiOp, BitImmLoOp, BitImmHiOp));
+ }
+ }
+
+ if (Exp2Imm.getNode() == NULL) {
+ // We couldnt constant fold, do a vector shift instead
+
+ // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
+ // only values 0-63 are valid.
+ if (VecTy == MVT::v2i64)
+ Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
+
+ Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
+
+ Exp2Imm =
+ DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
+ }
+
+ return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
+}
+
+static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ SDValue One = DAG.getConstant(1, ResTy);
+ SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
+ DAG.getNOT(DL, Bit, ResTy));
+}
+
+static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+ << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
+ SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
+
+ return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
+}
+
SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
default:
return SDValue();
@@ -701,12 +1432,610 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerDSPIntr(Op, DAG, MipsISD::MSub);
case Intrinsic::mips_msubu:
return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+ case Intrinsic::mips_addv_b:
+ case Intrinsic::mips_addv_h:
+ case Intrinsic::mips_addv_w:
+ case Intrinsic::mips_addv_d:
+ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_addvi_b:
+ case Intrinsic::mips_addvi_h:
+ case Intrinsic::mips_addvi_w:
+ case Intrinsic::mips_addvi_d:
+ return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_and_v:
+ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_andi_b:
+ return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_bclr_b:
+ case Intrinsic::mips_bclr_h:
+ case Intrinsic::mips_bclr_w:
+ case Intrinsic::mips_bclr_d:
+ return lowerMSABitClear(Op, DAG);
+ case Intrinsic::mips_bclri_b:
+ case Intrinsic::mips_bclri_h:
+ case Intrinsic::mips_bclri_w:
+ case Intrinsic::mips_bclri_d:
+ return lowerMSABitClearImm(Op, DAG);
+ case Intrinsic::mips_binsli_b:
+ case Intrinsic::mips_binsli_h:
+ case Intrinsic::mips_binsli_w:
+ case Intrinsic::mips_binsli_d: {
+ EVT VecTy = Op->getValueType(0);
+ EVT EltTy = VecTy.getVectorElementType();
+ APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
+ Op->getConstantOperandVal(3));
+ return DAG.getNode(ISD::VSELECT, DL, VecTy,
+ DAG.getConstant(Mask, VecTy, true), Op->getOperand(1),
+ Op->getOperand(2));
+ }
+ case Intrinsic::mips_binsri_b:
+ case Intrinsic::mips_binsri_h:
+ case Intrinsic::mips_binsri_w:
+ case Intrinsic::mips_binsri_d: {
+ EVT VecTy = Op->getValueType(0);
+ EVT EltTy = VecTy.getVectorElementType();
+ APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
+ Op->getConstantOperandVal(3));
+ return DAG.getNode(ISD::VSELECT, DL, VecTy,
+ DAG.getConstant(Mask, VecTy, true), Op->getOperand(1),
+ Op->getOperand(2));
+ }
+ case Intrinsic::mips_bmnz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(2), Op->getOperand(1));
+ case Intrinsic::mips_bmnzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
+ Op->getOperand(1));
+ case Intrinsic::mips_bmz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_bmzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_bneg_b:
+ case Intrinsic::mips_bneg_h:
+ case Intrinsic::mips_bneg_w:
+ case Intrinsic::mips_bneg_d: {
+ EVT VecTy = Op->getValueType(0);
+ SDValue One = DAG.getConstant(1, VecTy);
+
+ return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One,
+ Op->getOperand(2)));
+ }
+ case Intrinsic::mips_bnegi_b:
+ case Intrinsic::mips_bnegi_h:
+ case Intrinsic::mips_bnegi_w:
+ case Intrinsic::mips_bnegi_d:
+ return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
+ !Subtarget->isLittle());
+ case Intrinsic::mips_bnz_b:
+ case Intrinsic::mips_bnz_h:
+ case Intrinsic::mips_bnz_w:
+ case Intrinsic::mips_bnz_d:
+ return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bnz_v:
+ return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bsel_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2),
+ Op->getOperand(3));
+ case Intrinsic::mips_bseli_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2),
+ lowerMSASplatImm(Op, 3, DAG));
+ case Intrinsic::mips_bset_b:
+ case Intrinsic::mips_bset_h:
+ case Intrinsic::mips_bset_w:
+ case Intrinsic::mips_bset_d: {
+ EVT VecTy = Op->getValueType(0);
+ SDValue One = DAG.getConstant(1, VecTy);
+
+ return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, DL, VecTy, One,
+ Op->getOperand(2)));
+ }
+ case Intrinsic::mips_bseti_b:
+ case Intrinsic::mips_bseti_h:
+ case Intrinsic::mips_bseti_w:
+ case Intrinsic::mips_bseti_d:
+ return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
+ !Subtarget->isLittle());
+ case Intrinsic::mips_bz_b:
+ case Intrinsic::mips_bz_h:
+ case Intrinsic::mips_bz_w:
+ case Intrinsic::mips_bz_d:
+ return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_bz_v:
+ return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ceq_b:
+ case Intrinsic::mips_ceq_h:
+ case Intrinsic::mips_ceq_w:
+ case Intrinsic::mips_ceq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETEQ);
+ case Intrinsic::mips_ceqi_b:
+ case Intrinsic::mips_ceqi_h:
+ case Intrinsic::mips_ceqi_w:
+ case Intrinsic::mips_ceqi_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETEQ);
+ case Intrinsic::mips_cle_s_b:
+ case Intrinsic::mips_cle_s_h:
+ case Intrinsic::mips_cle_s_w:
+ case Intrinsic::mips_cle_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETLE);
+ case Intrinsic::mips_clei_s_b:
+ case Intrinsic::mips_clei_s_h:
+ case Intrinsic::mips_clei_s_w:
+ case Intrinsic::mips_clei_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETLE);
+ case Intrinsic::mips_cle_u_b:
+ case Intrinsic::mips_cle_u_h:
+ case Intrinsic::mips_cle_u_w:
+ case Intrinsic::mips_cle_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULE);
+ case Intrinsic::mips_clei_u_b:
+ case Intrinsic::mips_clei_u_h:
+ case Intrinsic::mips_clei_u_w:
+ case Intrinsic::mips_clei_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
+ case Intrinsic::mips_clt_s_b:
+ case Intrinsic::mips_clt_s_h:
+ case Intrinsic::mips_clt_s_w:
+ case Intrinsic::mips_clt_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETLT);
+ case Intrinsic::mips_clti_s_b:
+ case Intrinsic::mips_clti_s_h:
+ case Intrinsic::mips_clti_s_w:
+ case Intrinsic::mips_clti_s_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETLT);
+ case Intrinsic::mips_clt_u_b:
+ case Intrinsic::mips_clt_u_h:
+ case Intrinsic::mips_clt_u_w:
+ case Intrinsic::mips_clt_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULT);
+ case Intrinsic::mips_clti_u_b:
+ case Intrinsic::mips_clti_u_h:
+ case Intrinsic::mips_clti_u_w:
+ case Intrinsic::mips_clti_u_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
+ case Intrinsic::mips_copy_s_b:
+ case Intrinsic::mips_copy_s_h:
+ case Intrinsic::mips_copy_s_w:
+ return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
+ case Intrinsic::mips_copy_s_d:
+ // Don't lower directly into VEXTRACT_SEXT_ELT since i64 might be illegal.
+ // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
+ // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_copy_u_b:
+ case Intrinsic::mips_copy_u_h:
+ case Intrinsic::mips_copy_u_w:
+ return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
+ case Intrinsic::mips_copy_u_d:
+ // Don't lower directly into VEXTRACT_ZEXT_ELT since i64 might be illegal.
+ // Instead lower to the generic EXTRACT_VECTOR_ELT node and let the type
+ // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
+ //
+ // Note: When i64 is illegal, this results in copy_s.w instructions instead
+ // of copy_u.w instructions. This makes no difference to the behaviour
+ // since i64 is only illegal when the register file is 32-bit.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_div_s_b:
+ case Intrinsic::mips_div_s_h:
+ case Intrinsic::mips_div_s_w:
+ case Intrinsic::mips_div_s_d:
+ return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_div_u_b:
+ case Intrinsic::mips_div_u_h:
+ case Intrinsic::mips_div_u_w:
+ case Intrinsic::mips_div_u_d:
+ return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_fadd_w:
+ case Intrinsic::mips_fadd_d:
+ return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
+ case Intrinsic::mips_fceq_w:
+ case Intrinsic::mips_fceq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOEQ);
+ case Intrinsic::mips_fcle_w:
+ case Intrinsic::mips_fcle_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOLE);
+ case Intrinsic::mips_fclt_w:
+ case Intrinsic::mips_fclt_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETOLT);
+ case Intrinsic::mips_fcne_w:
+ case Intrinsic::mips_fcne_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETONE);
+ case Intrinsic::mips_fcor_w:
+ case Intrinsic::mips_fcor_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETO);
+ case Intrinsic::mips_fcueq_w:
+ case Intrinsic::mips_fcueq_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUEQ);
+ case Intrinsic::mips_fcule_w:
+ case Intrinsic::mips_fcule_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULE);
+ case Intrinsic::mips_fcult_w:
+ case Intrinsic::mips_fcult_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETULT);
+ case Intrinsic::mips_fcun_w:
+ case Intrinsic::mips_fcun_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUO);
+ case Intrinsic::mips_fcune_w:
+ case Intrinsic::mips_fcune_d:
+ return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2), ISD::SETUNE);
+ case Intrinsic::mips_fdiv_w:
+ case Intrinsic::mips_fdiv_d:
+ return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ffint_u_w:
+ case Intrinsic::mips_ffint_u_d:
+ return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ffint_s_w:
+ case Intrinsic::mips_ffint_s_d:
+ return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_fill_b:
+ case Intrinsic::mips_fill_h:
+ case Intrinsic::mips_fill_w:
+ case Intrinsic::mips_fill_d: {
+ SmallVector<SDValue, 16> Ops;
+ EVT ResTy = Op->getValueType(0);
+
+ for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
+ Ops.push_back(Op->getOperand(1));
+
+ // If ResTy is v2i64 then the type legalizer will break this node down into
+ // an equivalent v4i32.
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size());
+ }
+ case Intrinsic::mips_fexp2_w:
+ case Intrinsic::mips_fexp2_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
+ }
+ case Intrinsic::mips_flog2_w:
+ case Intrinsic::mips_flog2_d:
+ return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fmadd_w:
+ case Intrinsic::mips_fmadd_d:
+ return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+ case Intrinsic::mips_fmul_w:
+ case Intrinsic::mips_fmul_d:
+ return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_fmsub_w:
+ case Intrinsic::mips_fmsub_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_frint_w:
+ case Intrinsic::mips_frint_d:
+ return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fsqrt_w:
+ case Intrinsic::mips_fsqrt_d:
+ return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_fsub_w:
+ case Intrinsic::mips_fsub_d:
+ return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ftrunc_u_w:
+ case Intrinsic::mips_ftrunc_u_d:
+ return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ftrunc_s_w:
+ case Intrinsic::mips_ftrunc_s_d:
+ return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
+ Op->getOperand(1));
+ case Intrinsic::mips_ilvev_b:
+ case Intrinsic::mips_ilvev_h:
+ case Intrinsic::mips_ilvev_w:
+ case Intrinsic::mips_ilvev_d:
+ return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvl_b:
+ case Intrinsic::mips_ilvl_h:
+ case Intrinsic::mips_ilvl_w:
+ case Intrinsic::mips_ilvl_d:
+ return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvod_b:
+ case Intrinsic::mips_ilvod_h:
+ case Intrinsic::mips_ilvod_w:
+ case Intrinsic::mips_ilvod_d:
+ return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_ilvr_b:
+ case Intrinsic::mips_ilvr_h:
+ case Intrinsic::mips_ilvr_w:
+ case Intrinsic::mips_ilvr_d:
+ return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_insert_b:
+ case Intrinsic::mips_insert_h:
+ case Intrinsic::mips_insert_w:
+ case Intrinsic::mips_insert_d:
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
+ case Intrinsic::mips_ldi_b:
+ case Intrinsic::mips_ldi_h:
+ case Intrinsic::mips_ldi_w:
+ case Intrinsic::mips_ldi_d:
+ return lowerMSASplatImm(Op, 1, DAG);
+ case Intrinsic::mips_lsa: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_maddv_b:
+ case Intrinsic::mips_maddv_h:
+ case Intrinsic::mips_maddv_w:
+ case Intrinsic::mips_maddv_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_max_s_b:
+ case Intrinsic::mips_max_s_h:
+ case Intrinsic::mips_max_s_w:
+ case Intrinsic::mips_max_s_d:
+ return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_max_u_b:
+ case Intrinsic::mips_max_u_h:
+ case Intrinsic::mips_max_u_w:
+ case Intrinsic::mips_max_u_d:
+ return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_maxi_s_b:
+ case Intrinsic::mips_maxi_s_h:
+ case Intrinsic::mips_maxi_s_w:
+ case Intrinsic::mips_maxi_s_d:
+ return DAG.getNode(MipsISD::VSMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_maxi_u_b:
+ case Intrinsic::mips_maxi_u_h:
+ case Intrinsic::mips_maxi_u_w:
+ case Intrinsic::mips_maxi_u_d:
+ return DAG.getNode(MipsISD::VUMAX, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_min_s_b:
+ case Intrinsic::mips_min_s_h:
+ case Intrinsic::mips_min_s_w:
+ case Intrinsic::mips_min_s_d:
+ return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_min_u_b:
+ case Intrinsic::mips_min_u_h:
+ case Intrinsic::mips_min_u_w:
+ case Intrinsic::mips_min_u_d:
+ return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_mini_s_b:
+ case Intrinsic::mips_mini_s_h:
+ case Intrinsic::mips_mini_s_w:
+ case Intrinsic::mips_mini_s_d:
+ return DAG.getNode(MipsISD::VSMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_mini_u_b:
+ case Intrinsic::mips_mini_u_h:
+ case Intrinsic::mips_mini_u_w:
+ case Intrinsic::mips_mini_u_d:
+ return DAG.getNode(MipsISD::VUMIN, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_mod_s_b:
+ case Intrinsic::mips_mod_s_h:
+ case Intrinsic::mips_mod_s_w:
+ case Intrinsic::mips_mod_s_d:
+ return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_mod_u_b:
+ case Intrinsic::mips_mod_u_h:
+ case Intrinsic::mips_mod_u_w:
+ case Intrinsic::mips_mod_u_d:
+ return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_mulv_b:
+ case Intrinsic::mips_mulv_h:
+ case Intrinsic::mips_mulv_w:
+ case Intrinsic::mips_mulv_d:
+ return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_msubv_b:
+ case Intrinsic::mips_msubv_h:
+ case Intrinsic::mips_msubv_w:
+ case Intrinsic::mips_msubv_d: {
+ EVT ResTy = Op->getValueType(0);
+ return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
+ DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
+ Op->getOperand(2), Op->getOperand(3)));
+ }
+ case Intrinsic::mips_nlzc_b:
+ case Intrinsic::mips_nlzc_h:
+ case Intrinsic::mips_nlzc_w:
+ case Intrinsic::mips_nlzc_d:
+ return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_nor_v: {
+ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ return DAG.getNOT(DL, Res, Res->getValueType(0));
+ }
+ case Intrinsic::mips_nori_b: {
+ SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1),
+ lowerMSASplatImm(Op, 2, DAG));
+ return DAG.getNOT(DL, Res, Res->getValueType(0));
+ }
+ case Intrinsic::mips_or_v:
+ return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_ori_b:
+ return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_pckev_b:
+ case Intrinsic::mips_pckev_h:
+ case Intrinsic::mips_pckev_w:
+ case Intrinsic::mips_pckev_d:
+ return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_pckod_b:
+ case Intrinsic::mips_pckod_h:
+ case Intrinsic::mips_pckod_w:
+ case Intrinsic::mips_pckod_d:
+ return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_pcnt_b:
+ case Intrinsic::mips_pcnt_h:
+ case Intrinsic::mips_pcnt_w:
+ case Intrinsic::mips_pcnt_d:
+ return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
+ case Intrinsic::mips_shf_b:
+ case Intrinsic::mips_shf_h:
+ case Intrinsic::mips_shf_w:
+ return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
+ Op->getOperand(2), Op->getOperand(1));
+ case Intrinsic::mips_sll_b:
+ case Intrinsic::mips_sll_h:
+ case Intrinsic::mips_sll_w:
+ case Intrinsic::mips_sll_d:
+ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_slli_b:
+ case Intrinsic::mips_slli_h:
+ case Intrinsic::mips_slli_w:
+ case Intrinsic::mips_slli_d:
+ return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_splat_b:
+ case Intrinsic::mips_splat_h:
+ case Intrinsic::mips_splat_w:
+ case Intrinsic::mips_splat_d:
+ // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
+ // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
+ // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
+ // Instead we lower to MipsISD::VSHF and match from there.
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
+ Op->getOperand(1));
+ case Intrinsic::mips_splati_b:
+ case Intrinsic::mips_splati_h:
+ case Intrinsic::mips_splati_w:
+ case Intrinsic::mips_splati_d:
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+ Op->getOperand(1));
+ case Intrinsic::mips_sra_b:
+ case Intrinsic::mips_sra_h:
+ case Intrinsic::mips_sra_w:
+ case Intrinsic::mips_sra_d:
+ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_srai_b:
+ case Intrinsic::mips_srai_h:
+ case Intrinsic::mips_srai_w:
+ case Intrinsic::mips_srai_d:
+ return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_srl_b:
+ case Intrinsic::mips_srl_h:
+ case Intrinsic::mips_srl_w:
+ case Intrinsic::mips_srl_d:
+ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_srli_b:
+ case Intrinsic::mips_srli_h:
+ case Intrinsic::mips_srli_w:
+ case Intrinsic::mips_srli_d:
+ return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_subv_b:
+ case Intrinsic::mips_subv_h:
+ case Intrinsic::mips_subv_w:
+ case Intrinsic::mips_subv_d:
+ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_subvi_b:
+ case Intrinsic::mips_subvi_h:
+ case Intrinsic::mips_subvi_w:
+ case Intrinsic::mips_subvi_d:
+ return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
+ case Intrinsic::mips_vshf_b:
+ case Intrinsic::mips_vshf_h:
+ case Intrinsic::mips_vshf_w:
+ case Intrinsic::mips_vshf_d:
+ return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
+ Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
+ case Intrinsic::mips_xor_v:
+ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
+ Op->getOperand(2));
+ case Intrinsic::mips_xori_b:
+ return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
+ Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
}
}
+static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+ SDLoc DL(Op);
+ SDValue ChainIn = Op->getOperand(0);
+ SDValue Address = Op->getOperand(2);
+ SDValue Offset = Op->getOperand(3);
+ EVT ResTy = Op->getValueType(0);
+ EVT PtrTy = Address->getValueType(0);
+
+ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
+
+ return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), false,
+ false, false, 16);
+}
+
SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
- switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+ unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ switch (Intr) {
default:
return SDValue();
case Intrinsic::mips_extp:
@@ -749,7 +2078,522 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
case Intrinsic::mips_dpsqx_sa_w_ph:
return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+ case Intrinsic::mips_ld_b:
+ case Intrinsic::mips_ld_h:
+ case Intrinsic::mips_ld_w:
+ case Intrinsic::mips_ld_d:
+ return lowerMSALoadIntr(Op, DAG, Intr);
+ }
+}
+
+static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr) {
+ SDLoc DL(Op);
+ SDValue ChainIn = Op->getOperand(0);
+ SDValue Value = Op->getOperand(2);
+ SDValue Address = Op->getOperand(3);
+ SDValue Offset = Op->getOperand(4);
+ EVT PtrTy = Address->getValueType(0);
+
+ Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
+
+ return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), false,
+ false, 16);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Intr = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+ switch (Intr) {
+ default:
+ return SDValue();
+ case Intrinsic::mips_st_b:
+ case Intrinsic::mips_st_h:
+ case Intrinsic::mips_st_w:
+ case Intrinsic::mips_st_d:
+ return lowerMSAStoreIntr(Op, DAG, Intr);
+ }
+}
+
+/// \brief Check if the given BuildVectorSDNode is a splat.
+/// This method currently relies on DAG nodes being reused when equivalent,
+/// so it's possible for this to return false even when isConstantSplat returns
+/// true.
+static bool isSplatVector(const BuildVectorSDNode *N) {
+ unsigned int nOps = N->getNumOperands();
+ assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
+
+ SDValue Operand0 = N->getOperand(0);
+
+ for (unsigned int i = 1; i < nOps; ++i) {
+ if (N->getOperand(i) != Operand0)
+ return false;
+ }
+
+ return true;
+}
+
+// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
+//
+// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
+// choose to sign-extend but we could have equally chosen zero-extend. The
+// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
+// result into this node later (possibly changing it to a zero-extend in the
+// process).
+SDValue MipsSETargetLowering::
+lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDValue Op0 = Op->getOperand(0);
+ EVT VecTy = Op0->getValueType(0);
+
+ if (!VecTy.is128BitVector())
+ return SDValue();
+
+ if (ResTy.isInteger()) {
+ SDValue Op1 = Op->getOperand(1);
+ EVT EltTy = VecTy.getVectorElementType();
+ return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
+ DAG.getValueType(EltTy));
+ }
+
+ return Op;
+}
+
+static bool isConstantOrUndef(const SDValue Op) {
+ if (Op->getOpcode() == ISD::UNDEF)
+ return true;
+ if (dyn_cast<ConstantSDNode>(Op))
+ return true;
+ if (dyn_cast<ConstantFPSDNode>(Op))
+ return true;
+ return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+ for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+ if (isConstantOrUndef(Op->getOperand(i)))
+ return true;
+ return false;
+}
+
+// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
+// backend.
+//
+// Lowers according to the following rules:
+// - Constant splats are legal as-is as long as the SplatBitSize is a power of
+// 2 less than or equal to 64 and the value fits into a signed 10-bit
+// immediate
+// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
+// is a power of 2 less than or equal to 64 and the value does not fit into a
+// signed 10-bit immediate
+// - Non-constant splats are legal as-is.
+// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
+// - All others are illegal and must be expanded.
+SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!Subtarget->hasMSA() || !ResTy.is128BitVector())
+ return SDValue();
+
+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8,
+ !Subtarget->isLittle()) && SplatBitSize <= 64) {
+ // We can only cope with 8, 16, 32, or 64-bit elements
+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+ SplatBitSize != 64)
+ return SDValue();
+
+ // If the value fits into a simm10 then we can use ldi.[bhwd]
+ // However, if it isn't an integer type we will have to bitcast from an
+ // integer type first. Also, if there are any undefs, we must lower them
+ // to defined values first.
+ if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10))
+ return Op;
+
+ EVT ViaVecTy;
+
+ switch (SplatBitSize) {
+ default:
+ return SDValue();
+ case 8:
+ ViaVecTy = MVT::v16i8;
+ break;
+ case 16:
+ ViaVecTy = MVT::v8i16;
+ break;
+ case 32:
+ ViaVecTy = MVT::v4i32;
+ break;
+ case 64:
+ // There's no fill.d to fall back on for 64-bit values
+ return SDValue();
+ }
+
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
+
+ // Bitcast to the type we originally wanted
+ if (ViaVecTy != ResTy)
+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
+
+ return Result;
+ } else if (isSplatVector(Node))
+ return Op;
+ else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+ // The resulting code is the same length as the expansion, but it doesn't
+ // use memory operations
+ EVT ResTy = Node->getValueType(0);
+
+ assert(ResTy.isVector());
+
+ unsigned NumElts = ResTy.getVectorNumElements();
+ SDValue Vector = DAG.getUNDEF(ResTy);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+ Node->getOperand(i),
+ DAG.getConstant(i, MVT::i32));
+ }
+ return Vector;
+ }
+
+ return SDValue();
+}
+
+// Lower VECTOR_SHUFFLE into SHF (if possible).
+//
+// SHF splits the vector into blocks of four elements, then shuffles these
+// elements according to a <4 x i2> constant (encoded as an integer immediate).
+//
+// It is therefore possible to lower into SHF when the mask takes the form:
+// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
+// When undef's appear they are treated as if they were whatever value is
+// necessary in order to fit the above form.
+//
+// For example:
+// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
+// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
+// i32 7, i32 6, i32 5, i32 4>
+// is lowered to:
+// (SHF_H $w0, $w1, 27)
+// where the 27 comes from:
+// 3 + (2 << 2) + (1 << 4) + (0 << 6)
+static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ int SHFIndices[4] = { -1, -1, -1, -1 };
+
+ if (Indices.size() < 4)
+ return SDValue();
+
+ for (unsigned i = 0; i < 4; ++i) {
+ for (unsigned j = i; j < Indices.size(); j += 4) {
+ int Idx = Indices[j];
+
+ // Convert from vector index to 4-element subvector index
+ // If an index refers to an element outside of the subvector then give up
+ if (Idx != -1) {
+ Idx -= 4 * (j / 4);
+ if (Idx < 0 || Idx >= 4)
+ return SDValue();
+ }
+
+ // If the mask has an undef, replace it with the current index.
+ // Note that it might still be undef if the current index is also undef
+ if (SHFIndices[i] == -1)
+ SHFIndices[i] = Idx;
+
+ // Check that non-undef values are the same as in the mask. If they
+ // aren't then give up
+ if (!(Idx == -1 || Idx == SHFIndices[i]))
+ return SDValue();
+ }
+ }
+
+ // Calculate the immediate. Replace any remaining undefs with zero
+ APInt Imm(32, 0);
+ for (int i = 3; i >= 0; --i) {
+ int Idx = SHFIndices[i];
+
+ if (Idx == -1)
+ Idx = 0;
+
+ Imm <<= 2;
+ Imm |= Idx & 0x3;
+ }
+
+ return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy,
+ DAG.getConstant(Imm, MVT::i32), Op->getOperand(0));
+}
+
+// Lower VECTOR_SHUFFLE into ILVEV (if possible).
+//
+// ILVEV interleaves the even elements from each vector.
+//
+// It is possible to lower into ILVEV when the mask takes the form:
+// <0, n, 2, n+2, 4, n+4, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 0;
+ int WtIdx = ResTy.getVectorNumElements();
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx += 2;
+ WtIdx += 2;
+ }
+
+ return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVOD (if possible).
+//
+// ILVOD interleaves the odd elements from each vector.
+//
+// It is possible to lower into ILVOD when the mask takes the form:
+// <1, n+1, 3, n+3, 5, n+5, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 1;
+ int WtIdx = ResTy.getVectorNumElements() + 1;
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx += 2;
+ WtIdx += 2;
+ }
+
+ return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVL (if possible).
+//
+// ILVL interleaves consecutive elements from the left half of each vector.
+//
+// It is possible to lower into ILVL when the mask takes the form:
+// <0, n, 1, n+1, 2, n+2, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int WsIdx = 0;
+ int WtIdx = ResTy.getVectorNumElements();
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx ++;
+ WtIdx ++;
}
+
+ return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into ILVR (if possible).
+//
+// ILVR interleaves consecutive elements from the right half of each vector.
+//
+// It is possible to lower into ILVR when the mask takes the form:
+// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
+// where n is the number of elements in the vector and x is half n.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ unsigned NumElts = ResTy.getVectorNumElements();
+ int WsIdx = NumElts / 2;
+ int WtIdx = NumElts + NumElts / 2;
+
+ for (unsigned i = 0; i < Indices.size(); i += 2) {
+ if (Indices[i] != -1 && Indices[i] != WsIdx)
+ return SDValue();
+ if (Indices[i+1] != -1 && Indices[i+1] != WtIdx)
+ return SDValue();
+ WsIdx ++;
+ WtIdx ++;
+ }
+
+ return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into PCKEV (if possible).
+//
+// PCKEV copies the even elements of each vector into the result vector.
+//
+// It is possible to lower into PCKEV when the mask takes the form:
+// <0, 2, 4, ..., n, n+2, n+4, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int Idx = 0;
+
+ for (unsigned i = 0; i < Indices.size(); ++i) {
+ if (Indices[i] != -1 && Indices[i] != Idx)
+ return SDValue();
+ Idx += 2;
+ }
+
+ return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into PCKOD (if possible).
+//
+// PCKOD copies the odd elements of each vector into the result vector.
+//
+// It is possible to lower into PCKOD when the mask takes the form:
+// <1, 3, 5, ..., n+1, n+3, n+5, ...>
+// where n is the number of elements in the vector.
+//
+// When undef's appear in the mask they are treated as if they were whatever
+// value is necessary in order to fit the above form.
+static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ assert ((Indices.size() % 2) == 0);
+ int Idx = 1;
+
+ for (unsigned i = 0; i < Indices.size(); ++i) {
+ if (Indices[i] != -1 && Indices[i] != Idx)
+ return SDValue();
+ Idx += 2;
+ }
+
+ return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Op->getOperand(0),
+ Op->getOperand(1));
+}
+
+// Lower VECTOR_SHUFFLE into VSHF.
+//
+// This mostly consists of converting the shuffle indices in Indices into a
+// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
+// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
+// if the type is v8i16 and all the indices are less than 8 then the second
+// operand is unused and can be replaced with anything. We choose to replace it
+// with the used operand since this reduces the number of instructions overall.
+static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
+ SmallVector<int, 16> Indices,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue, 16> Ops;
+ SDValue Op0;
+ SDValue Op1;
+ EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
+ EVT MaskEltTy = MaskVecTy.getVectorElementType();
+ bool Using1stVec = false;
+ bool Using2ndVec = false;
+ SDLoc DL(Op);
+ int ResTyNumElts = ResTy.getVectorNumElements();
+
+ for (int i = 0; i < ResTyNumElts; ++i) {
+ // Idx == -1 means UNDEF
+ int Idx = Indices[i];
+
+ if (0 <= Idx && Idx < ResTyNumElts)
+ Using1stVec = true;
+ if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
+ Using2ndVec = true;
+ }
+
+ for (SmallVector<int, 16>::iterator I = Indices.begin(); I != Indices.end();
+ ++I)
+ Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy));
+
+ SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0],
+ Ops.size());
+
+ if (Using1stVec && Using2ndVec) {
+ Op0 = Op->getOperand(0);
+ Op1 = Op->getOperand(1);
+ } else if (Using1stVec)
+ Op0 = Op1 = Op->getOperand(0);
+ else if (Using2ndVec)
+ Op0 = Op1 = Op->getOperand(1);
+ else
+ llvm_unreachable("shuffle vector mask references neither vector operand?");
+
+ return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op0, Op1);
+}
+
+// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
+// indices in the shuffle.
+SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+
+ if (!ResTy.is128BitVector())
+ return SDValue();
+
+ int ResTyNumElts = ResTy.getVectorNumElements();
+ SmallVector<int, 16> Indices;
+
+ for (int i = 0; i < ResTyNumElts; ++i)
+ Indices.push_back(Node->getMaskElt(i));
+
+ SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG);
+ if (Result.getNode())
+ return Result;
+ return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG);
}
MachineBasicBlock * MipsSETargetLowering::
@@ -814,3 +2658,318 @@ emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
MI->eraseFromParent(); // The pseudo instruction is gone now.
return Sink;
}
+
+MachineBasicBlock * MipsSETargetLowering::
+emitMSACBranchPseudo(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned BranchOp) const{
+ // $bb:
+ // vany_nonzero $rd, $ws
+ // =>
+ // $bb:
+ // bnz.b $ws, $tbb
+ // b $fbb
+ // $fbb:
+ // li $rd1, 0
+ // b $sink
+ // $tbb:
+ // li $rd2, 1
+ // $sink:
+ // $rd = phi($rd1, $fbb, $rd2, $tbb)
+
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ DebugLoc DL = MI->getDebugLoc();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB));
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, FBB);
+ F->insert(It, TBB);
+ F->insert(It, Sink);
+
+ // Transfer the remainder of BB and its successor edges to Sink.
+ Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ Sink->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add successors.
+ BB->addSuccessor(FBB);
+ BB->addSuccessor(TBB);
+ FBB->addSuccessor(Sink);
+ TBB->addSuccessor(Sink);
+
+ // Insert the real bnz.b instruction to $BB.
+ BuildMI(BB, DL, TII->get(BranchOp))
+ .addReg(MI->getOperand(1).getReg())
+ .addMBB(TBB);
+
+ // Fill $FBB.
+ unsigned RD1 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
+ .addReg(Mips::ZERO).addImm(0);
+ BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
+
+ // Fill $TBB.
+ unsigned RD2 = RegInfo.createVirtualRegister(RC);
+ BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
+ .addReg(Mips::ZERO).addImm(1);
+
+ // Insert phi function to $Sink.
+ BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(RD1).addMBB(FBB).addReg(RD2).addMBB(TBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return Sink;
+}
+
+// Emit the COPY_FW pseudo instruction.
+//
+// copy_fw_pseudo $fd, $ws, n
+// =>
+// copy_u_w $rt, $ws, $n
+// mtc1 $rt, $fd
+//
+// When n is zero, the equivalent operation can be performed with (potentially)
+// zero instructions due to register overlaps. This optimization is never valid
+// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
+MachineBasicBlock * MipsSETargetLowering::
+emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Fd = MI->getOperand(0).getReg();
+ unsigned Ws = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+
+ if (Lane == 0)
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
+ else {
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the COPY_FD pseudo instruction.
+//
+// copy_fd_pseudo $fd, $ws, n
+// =>
+// splati.d $wt, $ws, $n
+// copy $fd, $wt:sub_64
+//
+// When n is zero, the equivalent operation can be performed with (potentially)
+// zero instructions due to register overlaps. This optimization is always
+// valid because FR=1 mode which is the only supported mode in MSA.
+MachineBasicBlock * MipsSETargetLowering::
+emitCOPY_FD(MachineInstr *MI, MachineBasicBlock *BB) const{
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ unsigned Fd = MI->getOperand(0).getReg();
+ unsigned Ws = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm() * 2;
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Lane == 0)
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
+ else {
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the INSERT_FW pseudo instruction.
+//
+// insert_fw_pseudo $wd, $wd_in, $n, $fs
+// =>
+// subreg_to_reg $wt:sub_lo, $fs
+// insve_w $wd[$n], $wd_in, $wt[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Wd_in = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+ unsigned Fs = MI->getOperand(3).getReg();
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
+ .addImm(0)
+ .addReg(Fs)
+ .addImm(Mips::sub_lo);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
+ .addReg(Wd_in)
+ .addImm(Lane)
+ .addReg(Wt);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the INSERT_FD pseudo instruction.
+//
+// insert_fd_pseudo $wd, $fs, n
+// =>
+// subreg_to_reg $wt:sub_64, $fs
+// insve_d $wd[$n], $wd_in, $wt[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Wd_in = MI->getOperand(1).getReg();
+ unsigned Lane = MI->getOperand(2).getImm();
+ unsigned Fs = MI->getOperand(3).getReg();
+ unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
+ .addImm(0)
+ .addReg(Fs)
+ .addImm(Mips::sub_64);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
+ .addReg(Wd_in)
+ .addImm(Lane)
+ .addReg(Wt);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FILL_FW pseudo instruction.
+//
+// fill_fw_pseudo $wd, $fs
+// =>
+// implicit_def $wt1
+// insert_subreg $wt2:subreg_lo, $wt1, $fs
+// splati.w $wd, $wt2[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitFILL_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Fs = MI->getOperand(1).getReg();
+ unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+ unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
+ .addReg(Wt1)
+ .addReg(Fs)
+ .addImm(Mips::sub_lo);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FILL_FD pseudo instruction.
+//
+// fill_fd_pseudo $wd, $fs
+// =>
+// implicit_def $wt1
+// insert_subreg $wt2:subreg_64, $wt1, $fs
+// splati.d $wd, $wt2[0]
+MachineBasicBlock *
+MipsSETargetLowering::emitFILL_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ assert(Subtarget->isFP64bit());
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned Fs = MI->getOperand(1).getReg();
+ unsigned Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+ unsigned Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+
+ BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
+ .addReg(Wt1)
+ .addReg(Fs)
+ .addImm(Mips::sub_64);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FEXP2_W_1 pseudo instructions.
+//
+// fexp2_w_1_pseudo $wd, $wt
+// =>
+// ldi.w $ws, 1
+// fexp2.w $wd, $ws, $wt
+MachineBasicBlock *
+MipsSETargetLowering::emitFEXP2_W_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
+ unsigned Ws1 = RegInfo.createVirtualRegister(RC);
+ unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Splat 1.0 into a vector
+ BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
+
+ // Emit 1.0 * fexp2(Wt)
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI->getOperand(0).getReg())
+ .addReg(Ws2)
+ .addReg(MI->getOperand(1).getReg());
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit the FEXP2_D_1 pseudo instructions.
+//
+// fexp2_d_1_pseudo $wd, $wt
+// =>
+// ldi.d $ws, 1
+// fexp2.d $wd, $ws, $wt
+MachineBasicBlock *
+MipsSETargetLowering::emitFEXP2_D_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
+ unsigned Ws1 = RegInfo.createVirtualRegister(RC);
+ unsigned Ws2 = RegInfo.createVirtualRegister(RC);
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Splat 1.0 into a vector
+ BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
+ BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
+
+ // Emit 1.0 * fexp2(Wt)
+ BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI->getOperand(0).getReg())
+ .addReg(Ws2)
+ .addReg(MI->getOperand(1).getReg());
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index ec8a5c7..c5210d9 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -22,6 +22,14 @@ namespace llvm {
public:
explicit MipsSETargetLowering(MipsTargetMachine &TM);
+ /// \brief Enable MSA support for the given integer type and Register
+ /// class.
+ void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC);
+ /// \brief Enable MSA support for the given floating-point type and
+ /// Register class.
+ void addMSAFloatType(MVT::SimpleValueType Ty,
+ const TargetRegisterClass *RC);
+
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -38,8 +46,8 @@ namespace llvm {
virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
if (VT == MVT::Untyped)
- return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
- &Mips::ACRegsRegClass;
+ return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass :
+ &Mips::ACC64RegClass;
return TargetLowering::getRepRegClassFor(VT);
}
@@ -56,14 +64,50 @@ namespace llvm {
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+ SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+
SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower VECTOR_SHUFFLE into one of a number of instructions
+ /// depending on the indices in the shuffle.
+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMSACBranchPseudo(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned BranchOp) const;
+ /// \brief Emit the COPY_FW pseudo instruction
+ MachineBasicBlock *emitCOPY_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the COPY_FD pseudo instruction
+ MachineBasicBlock *emitCOPY_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the INSERT_FW pseudo instruction
+ MachineBasicBlock *emitINSERT_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the INSERT_FD pseudo instruction
+ MachineBasicBlock *emitINSERT_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FILL_FW pseudo instruction
+ MachineBasicBlock *emitFILL_FW(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FILL_FD pseudo instruction
+ MachineBasicBlock *emitFILL_FD(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FEXP2_W_1 pseudo instructions.
+ MachineBasicBlock *emitFEXP2_W_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+ /// \brief Emit the FEXP2_D_1 pseudo instructions.
+ MachineBasicBlock *emitFEXP2_D_1(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
};
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 9521043..02931a3 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -24,11 +24,6 @@
using namespace llvm;
-static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
- cl::desc("Expand double precision loads and "
- "stores to their single precision "
- "counterparts."));
-
MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm,
tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
@@ -49,10 +44,8 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
unsigned Opc = MI->getOpcode();
- if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) ||
- (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) ||
- (Opc == Mips::LDC1) || (Opc == Mips::LDC164) ||
- (Opc == Mips::LDC164_P8)) {
+ if ((Opc == Mips::LW) || (Opc == Mips::LD) ||
+ (Opc == Mips::LWC1) || (Opc == Mips::LDC1) || (Opc == Mips::LDC164)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -74,10 +67,8 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
{
unsigned Opc = MI->getOpcode();
- if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) ||
- (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) ||
- (Opc == Mips::SDC1) || (Opc == Mips::SDC164) ||
- (Opc == Mips::SDC164_P8)) {
+ if ((Opc == Mips::SW) || (Opc == Mips::SD) ||
+ (Opc == Mips::SWC1) || (Opc == Mips::SDC1) || (Opc == Mips::SDC164)) {
if ((MI->getOperand(1).isFI()) && // is a stack slot
(MI->getOperand(2).isImm()) && // the imm is zero
(isZeroImm(MI->getOperand(2)))) {
@@ -101,32 +92,34 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
Opc = Mips::MFC1;
- else if (Mips::HIRegsRegClass.contains(SrcReg))
+ else if (Mips::HI32RegClass.contains(SrcReg))
Opc = Mips::MFHI, SrcReg = 0;
- else if (Mips::LORegsRegClass.contains(SrcReg))
+ else if (Mips::LO32RegClass.contains(SrcReg))
Opc = Mips::MFLO, SrcReg = 0;
- else if (Mips::HIRegsDSPRegClass.contains(SrcReg))
+ else if (Mips::HI32DSPRegClass.contains(SrcReg))
Opc = Mips::MFHI_DSP;
- else if (Mips::LORegsDSPRegClass.contains(SrcReg))
+ else if (Mips::LO32DSPRegClass.contains(SrcReg))
Opc = Mips::MFLO_DSP;
else if (Mips::DSPCCRegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4)
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
return;
}
+ else if (Mips::MSACtrlRegClass.contains(SrcReg))
+ Opc = Mips::CFCMSA;
}
else if (Mips::GPR32RegClass.contains(SrcReg)) { // Copy from CPU Reg.
if (Mips::CCRRegClass.contains(DestReg))
Opc = Mips::CTC1;
else if (Mips::FGR32RegClass.contains(DestReg))
Opc = Mips::MTC1;
- else if (Mips::HIRegsRegClass.contains(DestReg))
+ else if (Mips::HI32RegClass.contains(DestReg))
Opc = Mips::MTHI, DestReg = 0;
- else if (Mips::LORegsRegClass.contains(DestReg))
+ else if (Mips::LO32RegClass.contains(DestReg))
Opc = Mips::MTLO, DestReg = 0;
- else if (Mips::HIRegsDSPRegClass.contains(DestReg))
+ else if (Mips::HI32DSPRegClass.contains(DestReg))
Opc = Mips::MTHI_DSP;
- else if (Mips::LORegsDSPRegClass.contains(DestReg))
+ else if (Mips::LO32DSPRegClass.contains(DestReg))
Opc = Mips::MTLO_DSP;
else if (Mips::DSPCCRegClass.contains(DestReg)) {
BuildMI(MBB, I, DL, get(Mips::WRDSP))
@@ -134,6 +127,8 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(DestReg, RegState::ImplicitDefine);
return;
}
+ else if (Mips::MSACtrlRegClass.contains(DestReg))
+ Opc = Mips::CTCMSA;
}
else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
Opc = Mips::FMOV_S;
@@ -144,21 +139,25 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::GPR64RegClass.contains(SrcReg))
Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
- else if (Mips::HIRegs64RegClass.contains(SrcReg))
+ else if (Mips::HI64RegClass.contains(SrcReg))
Opc = Mips::MFHI64, SrcReg = 0;
- else if (Mips::LORegs64RegClass.contains(SrcReg))
+ else if (Mips::LO64RegClass.contains(SrcReg))
Opc = Mips::MFLO64, SrcReg = 0;
else if (Mips::FGR64RegClass.contains(SrcReg))
Opc = Mips::DMFC1;
}
else if (Mips::GPR64RegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
- if (Mips::HIRegs64RegClass.contains(DestReg))
+ if (Mips::HI64RegClass.contains(DestReg))
Opc = Mips::MTHI64, DestReg = 0;
- else if (Mips::LORegs64RegClass.contains(DestReg))
+ else if (Mips::LO64RegClass.contains(DestReg))
Opc = Mips::MTLO64, DestReg = 0;
else if (Mips::FGR64RegClass.contains(DestReg))
Opc = Mips::DMTC1;
}
+ else if (Mips::MSA128BRegClass.contains(DestReg)) { // Copy to MSA reg
+ if (Mips::MSA128BRegClass.contains(SrcReg))
+ Opc = Mips::MOVE_V;
+ }
assert(Opc && "Cannot copy registers");
@@ -186,23 +185,31 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc = 0;
if (Mips::GPR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+ Opc = Mips::SW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
- else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64;
- else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
- else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+ Opc = Mips::SD;
+ else if (Mips::ACC64RegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC64;
+ else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC64DSP;
+ else if (Mips::ACC128RegClass.hasSubClassEq(RC))
+ Opc = Mips::STORE_ACC128;
else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP;
+ Opc = Mips::STORE_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+ Opc = Mips::SWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
Opc = Mips::SDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+ Opc = Mips::SDC164;
+ else if (RC->hasType(MVT::v16i8))
+ Opc = Mips::ST_B;
+ else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ Opc = Mips::ST_H;
+ else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ Opc = Mips::ST_W;
+ else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ Opc = Mips::ST_D;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
@@ -219,23 +226,31 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc = 0;
if (Mips::GPR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+ Opc = Mips::LW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
- else if (Mips::ACRegsRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64;
- else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
- else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+ Opc = Mips::LD;
+ else if (Mips::ACC64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC64;
+ else if (Mips::ACC64DSPRegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC64DSP;
+ else if (Mips::ACC128RegClass.hasSubClassEq(RC))
+ Opc = Mips::LOAD_ACC128;
else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP;
+ Opc = Mips::LOAD_CCOND_DSP;
else if (Mips::FGR32RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+ Opc = Mips::LWC1;
else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
Opc = Mips::LDC1;
else if (Mips::FGR64RegClass.hasSubClassEq(RC))
- Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+ Opc = Mips::LDC164;
+ else if (RC->hasType(MVT::v16i8))
+ Opc = Mips::LD_B;
+ else if (RC->hasType(MVT::v8i16) || RC->hasType(MVT::v8f16))
+ Opc = Mips::LD_H;
+ else if (RC->hasType(MVT::v4i32) || RC->hasType(MVT::v4f32))
+ Opc = Mips::LD_W;
+ else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
+ Opc = Mips::LD_D;
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
@@ -251,6 +266,27 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::RetRA:
expandRetRA(MBB, MI, Mips::RET);
break;
+ case Mips::PseudoMFHI:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI);
+ break;
+ case Mips::PseudoMFLO:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO);
+ break;
+ case Mips::PseudoMFHI64:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFHI64);
+ break;
+ case Mips::PseudoMFLO64:
+ expandPseudoMFHiLo(MBB, MI, Mips::MFLO64);
+ break;
+ case Mips::PseudoMTLOHI:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO, Mips::MTHI, false);
+ break;
+ case Mips::PseudoMTLOHI64:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO64, Mips::MTHI64, false);
+ break;
+ case Mips::PseudoMTLOHI_DSP:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
+ break;
case Mips::PseudoCVT_S_W:
expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
break;
@@ -267,16 +303,16 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
expandCvtFPInt(MBB, MI, Mips::CVT_D64_L, Mips::DMTC1, true);
break;
case Mips::BuildPairF64:
- expandBuildPairF64(MBB, MI);
+ expandBuildPairF64(MBB, MI, false);
break;
- case Mips::ExtractElementF64:
- expandExtractElementF64(MBB, MI);
+ case Mips::BuildPairF64_64:
+ expandBuildPairF64(MBB, MI, true);
break;
- case Mips::PseudoLDC1:
- expandDPLoadStore(MBB, MI, Mips::LDC1, Mips::LWC1);
+ case Mips::ExtractElementF64:
+ expandExtractElementF64(MBB, MI, false);
break;
- case Mips::PseudoSDC1:
- expandDPLoadStore(MBB, MI, Mips::SDC1, Mips::SWC1);
+ case Mips::ExtractElementF64_64:
+ expandExtractElementF64(MBB, MI, true);
break;
case Mips::MIPSeh_return32:
case Mips::MIPSeh_return64:
@@ -399,6 +435,41 @@ MipsSEInstrInfo::compareOpndSize(unsigned Opc,
return std::make_pair(DstRegSize > SrcRegSize, DstRegSize < SrcRegSize);
}
+void MipsSEInstrInfo::expandPseudoMFHiLo(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned NewOpc) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(NewOpc), I->getOperand(0).getReg());
+}
+
+void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned LoOpc,
+ unsigned HiOpc,
+ bool HasExplicitDef) const {
+ // Expand
+ // lo_hi pseudomtlohi $gpr0, $gpr1
+ // to these two instructions:
+ // mtlo $gpr0
+ // mthi $gpr1
+
+ DebugLoc DL = I->getDebugLoc();
+ const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2);
+ MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc));
+ MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc));
+ LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
+ HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
+
+ // Add lo/hi registers if the mtlo/hi instructions created have explicit
+ // def registers.
+ if (HasExplicitDef) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ unsigned DstLo = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
+ unsigned DstHi = getRegisterInfo().getSubReg(DstReg, Mips::sub_hi);
+ LoInst.addReg(DstLo, RegState::Define);
+ HiInst.addReg(DstHi, RegState::Define);
+ }
+}
+
void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned CvtOpc, unsigned MovOpc,
@@ -408,100 +479,63 @@ void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
unsigned DstReg = Dst.getReg(), SrcReg = Src.getReg(), TmpReg = DstReg;
unsigned KillSrc = getKillRegState(Src.isKill());
DebugLoc DL = I->getDebugLoc();
- unsigned SubIdx = (IsI64 ? Mips::sub_32 : Mips::sub_fpeven);
bool DstIsLarger, SrcIsLarger;
tie(DstIsLarger, SrcIsLarger) = compareOpndSize(CvtOpc, *MBB.getParent());
if (DstIsLarger)
- TmpReg = getRegisterInfo().getSubReg(DstReg, SubIdx);
+ TmpReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
if (SrcIsLarger)
- DstReg = getRegisterInfo().getSubReg(DstReg, SubIdx);
+ DstReg = getRegisterInfo().getSubReg(DstReg, Mips::sub_lo);
BuildMI(MBB, I, DL, MovDesc, TmpReg).addReg(SrcReg, KillSrc);
BuildMI(MBB, I, DL, CvtDesc, DstReg).addReg(TmpReg, RegState::Kill);
}
void MipsSEInstrInfo::expandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
unsigned DstReg = I->getOperand(0).getReg();
unsigned SrcReg = I->getOperand(1).getReg();
unsigned N = I->getOperand(2).getImm();
- const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
DebugLoc dl = I->getDebugLoc();
assert(N < 2 && "Invalid immediate");
- unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+ unsigned SubIdx = N ? Mips::sub_hi : Mips::sub_lo;
unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
- BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+ if (SubIdx == Mips::sub_hi && FP64)
+ BuildMI(MBB, I, dl, get(Mips::MFHC1), DstReg).addReg(SubReg);
+ else
+ BuildMI(MBB, I, dl, get(Mips::MFC1), DstReg).addReg(SubReg);
}
void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
+ MachineBasicBlock::iterator I,
+ bool FP64) const {
unsigned DstReg = I->getOperand(0).getReg();
unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
const TargetRegisterInfo &TRI = getRegisterInfo();
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
- .addReg(LoReg);
- BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
- .addReg(HiReg);
-}
-
-/// Add 4 to the displacement of operand MO.
-static void fixDisp(MachineOperand &MO) {
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unhandled operand type.");
- case MachineOperand::MO_Immediate:
- MO.setImm(MO.getImm() + 4);
- break;
- case MachineOperand::MO_GlobalAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_TargetIndex:
- case MachineOperand::MO_ExternalSymbol:
- MO.setOffset(MO.getOffset() + 4);
- break;
- }
-}
-
-void MipsSEInstrInfo::expandDPLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned OpcD, unsigned OpcS) const {
- // If NoDPLoadStore is false, just change the opcode.
- if (!NoDPLoadStore) {
- genInstrWithNewOpc(OpcD, I);
- return;
- }
+ // For FP32 mode:
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
+ // For FP64 mode:
+ // mtc1 Lo, $fp
+ // mthc1 Hi, $fp
- // Expand a double precision FP load or store to two single precision
- // instructions.
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
+ .addReg(LoReg);
- const TargetRegisterInfo &TRI = getRegisterInfo();
- const MachineOperand &ValReg = I->getOperand(0);
- unsigned LoReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpeven);
- unsigned HiReg = TRI.getSubReg(ValReg.getReg(), Mips::sub_fpodd);
-
- if (!TM.getSubtarget<MipsSubtarget>().isLittle())
- std::swap(LoReg, HiReg);
-
- // Create an instruction which loads from or stores to the lower memory
- // address.
- MachineInstrBuilder MIB = genInstrWithNewOpc(OpcS, I);
- MIB->getOperand(0).setReg(LoReg);
-
- // Create an instruction which loads from or stores to the higher memory
- // address.
- MIB = genInstrWithNewOpc(OpcS, I);
- MIB->getOperand(0).setReg(HiReg);
- fixDisp(MIB->getOperand(2));
+ if (FP64)
+ BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi))
+ .addReg(HiReg);
+ else
+ BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
+ .addReg(HiReg);
}
void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index d962ef0..6d2dd90 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -87,6 +87,13 @@ private:
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
+ void expandPseudoMFHiLo(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned NewOpc) const;
+
+ void expandPseudoMTLoHi(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned LoOpc, unsigned HiOpc,
+ bool HasExplicitDef) const;
+
/// Expand pseudo Int-to-FP conversion instructions.
///
/// For example, the following pseudo instruction
@@ -101,12 +108,9 @@ private:
unsigned CvtOpc, unsigned MovOpc, bool IsI64) const;
void expandExtractElementF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I, bool FP64) const;
void expandBuildPairF64(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
- void expandDPLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, unsigned OpcD,
- unsigned OpcS) const;
+ MachineBasicBlock::iterator I, bool FP64) const;
void expandEhReturn(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
};
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 286a2e2..2d44084 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -62,6 +62,24 @@ MipsSERegisterInfo::intRegClass(unsigned Size) const {
return &Mips::GPR64RegClass;
}
+/// Determine whether a given opcode is an MSA load/store (supporting 10-bit
+/// offsets) or a non-MSA load/store (supporting 16-bit offsets).
+static inline bool isMSALoadOrStore(const unsigned Opcode) {
+ switch (Opcode) {
+ case Mips::LD_B:
+ case Mips::LD_H:
+ case Mips::LD_W:
+ case Mips::LD_D:
+ case Mips::ST_B:
+ case Mips::ST_H:
+ case Mips::ST_W:
+ case Mips::ST_D:
+ return true;
+ default:
+ return false;
+ }
+}
+
void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned OpNo, int FrameIndex,
uint64_t StackSize,
@@ -111,23 +129,49 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
- // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
- // field.
- if (!MI.isDebugValue() && !isInt<16>(Offset)) {
- MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned NewImm;
- const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo*>(
- MBB.getParent()->getTarget().getInstrInfo());
- unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, &NewImm);
- BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
- .addReg(Reg, RegState::Kill);
-
- FrameReg = Reg;
- Offset = SignExtend64<16>(NewImm);
- IsKill = true;
+ if (!MI.isDebugValue()) {
+ // Make sure Offset fits within the field available.
+ // For MSA instructions, this is a 10-bit signed immediate, otherwise it is
+ // a 16-bit signed immediate.
+ unsigned OffsetBitSize = isMSALoadOrStore(MI.getOpcode()) ? 10 : 16;
+
+ if (OffsetBitSize == 10 && !isInt<10>(Offset) && isInt<16>(Offset)) {
+ // If we have an offset that needs to fit into a signed 10-bit immediate
+ // and doesn't, but does fit into 16-bits then use an ADDiu
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ const TargetRegisterClass *RC =
+ Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
+ unsigned Reg = RegInfo.createVirtualRegister(RC);
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo *>(
+ MBB.getParent()->getTarget().getInstrInfo());
+ BuildMI(MBB, II, DL, TII.get(ADDiu), Reg).addReg(FrameReg).addImm(Offset);
+
+ FrameReg = Reg;
+ Offset = 0;
+ IsKill = true;
+ } else if (!isInt<16>(Offset)) {
+ // Otherwise split the offset into 16-bit pieces and add it in multiple
+ // instructions.
+ MachineBasicBlock &MBB = *MI.getParent();
+ DebugLoc DL = II->getDebugLoc();
+ unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned NewImm = 0;
+ const MipsSEInstrInfo &TII =
+ *static_cast<const MipsSEInstrInfo *>(
+ MBB.getParent()->getTarget().getInstrInfo());
+ unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
+ OffsetBitSize == 16 ? &NewImm : NULL);
+ BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
+ .addReg(Reg, RegState::Kill);
+
+ FrameReg = Reg;
+ Offset = SignExtend64<16>(NewImm);
+ IsKill = true;
+ }
}
MI.getOperand(OpNo).ChangeToRegister(FrameReg, false, false, IsKill);
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 541e2ca..0a81072 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -53,6 +53,12 @@ Mips16HardFloat("mips16-hard-float", cl::NotHidden,
cl::desc("MIPS: mips16 hard float enable."),
cl::init(false));
+static cl::opt<bool>
+Mips16ConstantIslands(
+ "mips16-constant-islands", cl::Hidden,
+ cl::desc("MIPS: mips16 constant islands enable. experimental feature"),
+ cl::init(false));
+
void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
@@ -65,7 +71,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
HasBitCount(false), HasFPIdx(false),
InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
RM(_RM), OverrideMode(NoOverride), TM(_TM)
{
std::string CPUName = CPU;
@@ -89,12 +95,20 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
(hasMips64() && (isABI_N32() || isABI_N64()))) &&
"Invalid Arch & ABI pair.");
+ if (hasMSA() && !isFP64bit())
+ report_fatal_error("MSA requires a 64-bit FPU register file (FR=1 mode). "
+ "See -mattr=+fp64.",
+ false);
+
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
// Set UseSmallSection.
UseSmallSection = !IsLinux && (RM == Reloc::Static);
+ // set some subtarget specific features
+ if (inMips16Mode())
+ HasBitCount=false;
}
bool
@@ -152,3 +166,11 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
}
}
+bool MipsSubtarget::mipsSEUsesSoftFloat() const {
+ return TM->Options.UseSoftFloat && !InMips16HardFloat;
+}
+
+bool MipsSubtarget::useConstantIslands() {
+ DEBUG(dbgs() << "use constant islands " << Mips16ConstantIslands << "\n");
+ return Mips16ConstantIslands;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index bfb13bb..6b2ab12 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -113,6 +113,9 @@ protected:
// compiled as Mips32
bool Os16;
+ // HasMSA -- supports MSA ASE.
+ bool HasMSA;
+
InstrItineraryData InstrItins;
// The instance to the register info section object
@@ -157,6 +160,7 @@ public:
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
+ bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
bool isSingleFloat() const { return IsSingleFloat; }
@@ -182,17 +186,25 @@ public:
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
+ bool hasMSA() const { return HasMSA; }
bool isLinux() const { return IsLinux; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
+ bool mipsSEUsesSoftFloat() const;
+
+ bool enableLongBranchPass() const {
+ return hasStandardEncoding() || allowMixed16_32();
+ }
+
/// Features related to the presence of specific instructions.
bool hasSEInReg() const { return HasSEInReg; }
bool hasCondMov() const { return HasCondMov; }
bool hasSwap() const { return HasSwap; }
bool hasBitCount() const { return HasBitCount; }
bool hasFPIdx() const { return HasFPIdx; }
+ bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); }
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
bool allowMixed16_32() const { return inMips16ModeDefault() |
@@ -200,6 +212,13 @@ public:
bool os16() const { return Os16;};
+// for now constant islands are on for the whole compilation unit but we only
+// really use them if in addition we are in mips16 mode
+//
+static bool useConstantIslands();
+
+ unsigned stackAlignment() const { return hasMips64() ? 16 : 8; }
+
// Grab MipsRegInfo object
const MipsReginfo &getMReginfo() const { return MRI; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ced6a09..5046c1b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -134,7 +135,13 @@ namespace {
class MipsPassConfig : public TargetPassConfig {
public:
MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ // The current implementation of long branch pass requires a scratch
+ // register ($at) to be available before branch instructions. Tail merging
+ // can break this requirement, so disable it when long branch pass is
+ // enabled.
+ EnableTailMerge = !getMipsSubtarget().enableLongBranchPass();
+ }
MipsTargetMachine &getMipsTargetMachine() const {
return getTM<MipsTargetMachine>();
@@ -160,7 +167,7 @@ void MipsPassConfig::addIRPasses() {
addPass(createMipsOs16(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
addPass(createMips16HardFloat(getMipsTargetMachine()));
- addPass(createMipsOptimizeMathLibCalls(getMipsTargetMachine()));
+ addPass(createPartiallyInlineLibCallsPass());
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
@@ -196,8 +203,7 @@ bool MipsPassConfig::addPreEmitPass() {
const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
addPass(createMipsDelaySlotFillerPass(TM));
- if (Subtarget.hasStandardEncoding() ||
- Subtarget.allowMixed16_32())
+ if (Subtarget.enableLongBranchPass())
addPass(createMipsLongBranchPass(TM));
if (Subtarget.inMips16Mode() ||
Subtarget.allowMixed16_32())
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
new file mode 100644
index 0000000..96966fd
--- /dev/null
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -0,0 +1,44 @@
+//===-- MipsTargetStreamer.h - Mips Target Streamer ------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETSTREAMER_H
+#define MIPSTARGETSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class MipsTargetStreamer : public MCTargetStreamer {
+ virtual void anchor();
+
+public:
+ virtual void emitMipsHackELFFlags(unsigned Flags) = 0;
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val) = 0;
+};
+
+// This part is for ascii assembly output
+class MipsTargetAsmStreamer : public MipsTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ MipsTargetAsmStreamer(formatted_raw_ostream &OS);
+ virtual void emitMipsHackELFFlags(unsigned Flags);
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val);
+};
+
+// This part is for ELF object output
+class MipsTargetELFStreamer : public MipsTargetStreamer {
+public:
+ MCELFStreamer &getStreamer();
+ virtual void emitMipsHackELFFlags(unsigned Flags);
+ virtual void emitMipsHackSTOCG(MCSymbol *Sym, unsigned Val);
+};
+}
+
+#endif
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index c7b8aa4..d5be0e4 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
@@ -277,3 +278,12 @@ void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
printOperand(MI, OpNum + 1, O);
}
}
+
+void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+ assert(Op.isExpr() && "Call prototype is not an MCExpr?");
+ const MCExpr *Expr = Op.getExpr();
+ const MCSymbol &Sym = cast<MCSymbolRefExpr>(Expr)->getSymbol();
+ O << Sym.getName();
+}
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index e0f44da..93029ae 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -44,7 +44,8 @@ public:
raw_ostream &O, const char *Modifier = 0);
void printMemOperand(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = 0);
-
+ void printProtoIdent(const MCInst *MI, int OpNum,
+ raw_ostream &O, const char *Modifier = 0);
};
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index dfa1ff5..f2784b8 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -35,8 +35,6 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const StringRef &TT) {
PrivateGlobalPrefix = "$L__";
- AllowPeriodsInName = false;
-
HasSetDirective = false;
HasSingleParameterDotFile = false;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index a2b9bec..7552fe7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
+#include "InstPrinter/NVPTXInstPrinter.h"
#include "cl_common_defines.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -47,21 +48,17 @@
#include <sstream>
using namespace llvm;
-bool RegAllocNilUsed = true;
-
#define DEPOTNAME "__local_depot"
static cl::opt<bool>
-EmitLineNumbers("nvptx-emit-line-numbers",
+EmitLineNumbers("nvptx-emit-line-numbers", cl::Hidden,
cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
cl::init(true));
-namespace llvm { bool InterleaveSrcInPtx = false; }
-
-static cl::opt<bool, true>
-InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+static cl::opt<bool>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: Emit source line in ptx file"),
- cl::location(llvm::InterleaveSrcInPtx));
+ cl::init(false));
namespace {
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -129,7 +126,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
- return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+ return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx);
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
@@ -294,7 +291,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
return;
// Emit the line from the source file.
- if (llvm::InterleaveSrcInPtx)
+ if (InterleaveSrc)
this->emitSrcInText(fileName.str(), curLoc.getLine());
std::stringstream temp;
@@ -317,6 +314,14 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
+ // Special: Do not mangle symbol operand of CALL_PROTOTYPE
+ if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
+ const MachineOperand &MO = MI->getOperand(0);
+ OutMI.addOperand(GetSymbolRef(MO,
+ OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
+ return;
+ }
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
@@ -344,7 +349,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_FPImmediate: {
const ConstantFP *Cnt = MO.getFPImm();
@@ -550,6 +555,19 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
VRegMapping.clear();
}
+void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
+ unsigned RegNo = MI->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ if (TRI->isVirtualRegister(RegNo)) {
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ getVirtualRegisterName(RegNo));
+ } else {
+ OutStreamer.AddComment(Twine("implicit-def: ") +
+ TM.getRegisterInfo()->getName(RegNo));
+ }
+ OutStreamer.AddBlankLine();
+}
+
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
raw_ostream &O) const {
// If the NVVM IR has some of reqntid* specified, then output
@@ -601,23 +619,30 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
O << ".minnctapersm " << mincta << "\n";
}
-void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
- raw_ostream &O) {
- const TargetRegisterClass *RC = MRI->getRegClass(vr);
+std::string
+NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
- unsigned mapped_vr = regmap[vr];
+ std::string Name;
+ raw_string_ostream NameStr(Name);
- if (!isVec) {
- O << getNVPTXRegClassStr(RC) << mapped_vr;
- return;
- }
- report_fatal_error("Bad register!");
+ VRegRCMap::const_iterator I = VRegMapping.find(RC);
+ assert(I != VRegMapping.end() && "Bad register class");
+ const DenseMap<unsigned, unsigned> &RegMap = I->second;
+
+ VRegMap::const_iterator VI = RegMap.find(Reg);
+ assert(VI != RegMap.end() && "Bad virtual register");
+ unsigned MappedVR = VI->second;
+
+ NameStr << getNVPTXRegClassStr(RC) << MappedVR;
+
+ NameStr.flush();
+ return Name;
}
-void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
raw_ostream &O) {
- getVirtualRegisterName(vr, isVec, O);
+ O << getVirtualRegisterName(vr);
}
void NVPTXAsmPrinter::printVecModifiedImmediate(
@@ -660,7 +685,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
else
O << ".func ";
printReturnValStr(F, O);
- O << *Mang->getSymbol(F) << "\n";
+ O << *getSymbol(F) << "\n";
emitFunctionParamList(F, O);
O << ";\n";
}
@@ -870,7 +895,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.Initialize(OutContext, TM);
- Mang = new Mangler(OutContext, &TM);
+ Mang = new Mangler(&TM);
// Emit header before any dwarf directives are emitted below.
emitHeader(M, OS1);
@@ -1190,7 +1215,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
// Ptx allows variable initilization only for constant and global state
// spaces.
@@ -1226,15 +1251,15 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (nvptxSubtarget.is64Bit()) {
- O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+ O << " .u64 " << *getSymbol(GVar) << "[";
O << ElementSize / 8;
} else {
- O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+ O << " .u32 " << *getSymbol(GVar) << "[";
O << ElementSize / 4;
}
O << "]";
} else {
- O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ O << " .b8 " << *getSymbol(GVar) << "[";
O << ElementSize;
O << "]";
}
@@ -1242,7 +1267,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
aggBuffer.print();
O << "}";
} else {
- O << " .b8 " << *Mang->getSymbol(GVar);
+ O << " .b8 " << *getSymbol(GVar);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1250,7 +1275,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
}
} else {
- O << " .b8 " << *Mang->getSymbol(GVar);
+ O << " .b8 " << *getSymbol(GVar);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1357,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
}
@@ -1372,7 +1397,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *Mang->getSymbol(GVar) << "[";
+ O << " .b8 " << *getSymbol(GVar) << "[";
if (ElementSize) {
O << itostr(ElementSize);
}
@@ -1427,7 +1452,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
(nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
- O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
+ O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
else {
std::string argName = I->getName();
const char *p = argName.c_str();
@@ -1486,13 +1511,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (llvm::isImage(*I)) {
std::string sname = I->getName();
if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .surfref " << *getSymbol(F) << "_param_"
<< paramIndex;
else // Default image is read_only
- O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .texref " << *getSymbol(F) << "_param_"
<< paramIndex;
} else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
+ O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
<< paramIndex;
continue;
}
@@ -1739,13 +1764,13 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
return;
}
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
}
if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- O << *Mang->getSymbol(GVar);
+ O << *getSymbol(GVar);
return;
} else {
O << *LowerConstant(CPV, *this);
@@ -1863,7 +1888,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::VectorTyID:
case Type::StructTyID: {
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
- isa<ConstantStruct>(CPV)) {
+ isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
int ElementSize = TD->getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
if (Bytes > ElementSize)
@@ -1993,6 +2018,116 @@ bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
return false;
}
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+
+ return false;
+}
+
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+ const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
+
+ O << '[';
+ printMemOperand(MI, OpNo, O);
+ O << ']';
+
+ return false;
+}
+
+void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (MO.getReg() == NVPTX::VRDepot)
+ O << DEPOTNAME << getFunctionNumber();
+ else
+ O << NVPTXInstPrinter::getRegisterName(MO.getReg());
+ } else {
+ emitVirtualRegister(MO.getReg(), O);
+ }
+ return;
+
+ case MachineOperand::MO_Immediate:
+ if (!Modifier)
+ O << MO.getImm();
+ else if (strstr(Modifier, "vec") == Modifier)
+ printVecModifiedImmediate(MO, Modifier, O);
+ else
+ llvm_unreachable(
+ "Don't know how to handle modifier on immediate operand");
+ return;
+
+ case MachineOperand::MO_FPImmediate:
+ printFPConstant(MO.getFPImm(), O);
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *symbname = MO.getSymbolName();
+ if (strstr(symbname, ".PARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 6, "%u[];", &index);
+ printParamName(index, O);
+ } else if (strstr(symbname, ".HLPPARAM") == symbname) {
+ unsigned index;
+ sscanf(symbname + 9, "%u[];", &index);
+ O << *CurrentFnSym << "_param_" << index << "_offset";
+ } else
+ O << symbname;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+
+ default:
+ llvm_unreachable("Operand type not supported.");
+ }
+}
+
+void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, O);
+
+ if (Modifier && !strcmp(Modifier, "add")) {
+ O << ", ";
+ printOperand(MI, opNum + 1, O);
+ } else {
+ if (MI->getOperand(opNum + 1).isImm() &&
+ MI->getOperand(opNum + 1).getImm() == 0)
+ return; // don't print ',0' or '+0'
+ O << "+";
+ printOperand(MI, opNum + 1, O);
+ }
+}
+
+
// Force static initialization.
extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 27bfa54..3abe5d1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -155,7 +155,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
if (pos == nextSymbolPos) {
const Value *v = Symbols[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- MCSymbol *Name = AP.Mang->getSymbol(GVar);
+ MCSymbol *Name = AP.getSymbol(GVar);
O << *Name;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
@@ -188,6 +188,7 @@ private:
void EmitFunctionEntryLabel();
void EmitFunctionBodyStart();
void EmitFunctionBodyEnd();
+ void emitImplicitDef(const MachineInstr *MI) const;
void EmitInstruction(const MachineInstr *);
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
@@ -213,7 +214,7 @@ private:
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
- void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
+ void emitVirtualRegister(unsigned int vr, raw_ostream &);
void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
@@ -222,7 +223,14 @@ private:
bool isImageType(const Type *Ty);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
-
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
+ void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+ const char *Modifier = 0);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &);
protected:
bool doInitialization(Module &M);
bool doFinalization(Module &M);
@@ -287,7 +295,7 @@ public:
bool ignoreLoc(const MachineInstr &);
- virtual void getVirtualRegisterName(unsigned, bool, raw_ostream &);
+ std::string getVirtualRegisterName(unsigned) const;
DebugLoc prevDebugLoc;
void emitLineNumberAsDotLoc(const MachineInstr &);
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 9f92a5b..9fb0dd8 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -142,7 +142,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
GlobalVariable *GV = I->first;
GlobalVariable *NewGV = I->second;
++I;
- Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+ Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType());
// At this point, the remaining uses of GV should be found only in global
// variable initializers, as other uses have been already been removed
// while walking through the instructions in function definitions.
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ba85e35..4b8b306 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,24 +26,24 @@
using namespace llvm;
static cl::opt<int>
-FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
" 1: do it 2: do it aggressively"),
cl::init(2));
static cl::opt<int> UsePrecDivF32(
- "nvptx-prec-divf32", cl::ZeroOrMore,
+ "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
" IEEE Compliant F32 div.rnd if avaiable."),
cl::init(2));
static cl::opt<bool>
-UsePrecSqrtF32("nvptx-prec-sqrtf32",
+UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
static cl::opt<bool>
-FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore,
+FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
cl::init(false));
@@ -118,8 +118,10 @@ bool NVPTXDAGToDAGISel::useF32FTZ() const {
/// expanded, promoted and normal instructions.
SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
SDNode *ResNode = NULL;
switch (N->getOpcode()) {
@@ -249,7 +251,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
- MVT::SimpleValueType TargetVT = LD->getValueType(0).getSimpleVT().SimpleTy;
+ MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
switch (TargetVT) {
@@ -1347,8 +1349,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDValue Addr;
SDValue Offset, Base;
unsigned Opcode;
- MVT::SimpleValueType SourceVT =
- N1.getNode()->getValueType(0).getSimpleVT().SimpleTy;
+ MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N2, Addr)) {
switch (SourceVT) {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 828242d..6a8be75 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -310,6 +310,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::CallSeqBegin";
case NVPTXISD::CallSeqEnd:
return "NVPTXISD::CallSeqEnd";
+ case NVPTXISD::CallPrototype:
+ return "NVPTXISD::CallPrototype";
case NVPTXISD::LoadV2:
return "NVPTXISD::LoadV2";
case NVPTXISD::LoadV4:
@@ -471,22 +473,47 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
Type *Ty,
unsigned Idx) const {
const DataLayout *TD = getDataLayout();
- unsigned align = 0;
- GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
+ unsigned Align = 0;
+ const Value *DirectCallee = CS->getCalledFunction();
+
+ if (!DirectCallee) {
+ // We don't have a direct function symbol, but that may be because of
+ // constant cast instructions in the call.
+ const Instruction *CalleeI = CS->getInstruction();
+ assert(CalleeI && "Call target is not a function or derived value?");
+
+ // With bitcast'd call targets, the instruction will be the call
+ if (isa<CallInst>(CalleeI)) {
+ // Check if we have call alignment metadata
+ if (llvm::getAlign(*cast<CallInst>(CalleeI), Idx, Align))
+ return Align;
+
+ const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
+ // Ignore any bitcast instructions
+ while(isa<ConstantExpr>(CalleeV)) {
+ const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
+ if (!CE->isCast())
+ break;
+ // Look through the bitcast
+ CalleeV = cast<ConstantExpr>(CalleeV)->getOperand(0);
+ }
- if (Func) { // direct call
- assert(CS->getCalledFunction() &&
- "direct call cannot find callee");
- if (!llvm::getAlign(*(CS->getCalledFunction()), Idx, align))
- align = TD->getABITypeAlignment(Ty);
- }
- else { // indirect call
- const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
- if (!llvm::getAlign(*CallI, Idx, align))
- align = TD->getABITypeAlignment(Ty);
+ // We have now looked past all of the bitcasts. Do we finally have a
+ // Function?
+ if (isa<Function>(CalleeV))
+ DirectCallee = CalleeV;
+ }
}
- return align;
+ // Check for function alignment information if we found that the
+ // ultimate target is a Function
+ if (DirectCallee)
+ if (llvm::getAlign(*cast<Function>(DirectCallee), Idx, Align))
+ return Align;
+
+ // Call is indirect or alignment information is not available, fall back to
+ // the ABI type alignment
+ return TD->getABITypeAlignment(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -860,18 +887,16 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
// to be emitted, and the label has to used as the last arg of call
// instruction.
- // The prototype is embedded in a string and put as the operand for an
- // INLINEASM SDNode.
- SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string proto_string =
- getPrototype(retTy, Args, Outs, retAlignment, CS);
- const char *asmstr = nvTM->getManagedStrPool()
- ->getManagedString(proto_string.c_str())->c_str();
- SDValue InlineAsmOps[] = {
- Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
- DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+ // The prototype is embedded in a string and put as the operand for a
+ // CallPrototype SDNode which will print out to the value of the string.
+ SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+ const char *ProtoStr =
+ nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
+ SDValue ProtoOps[] = {
+ Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
};
- Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
+ Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
@@ -1595,7 +1620,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
}
Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
- InsIdx += VecSize;
+ InsIdx += NumElts;
}
if (NumElts > 0)
@@ -2263,3 +2288,29 @@ void NVPTXTargetLowering::ReplaceNodeResults(
return;
}
}
+
+// Pin NVPTXSection's and NVPTXTargetObjectFile's vtables to this file.
+void NVPTXSection::anchor() {}
+
+NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
+ delete TextSection;
+ delete DataSection;
+ delete BSSSection;
+ delete ReadOnlySection;
+
+ delete StaticCtorSection;
+ delete StaticDtorSection;
+ delete LSDASection;
+ delete EHFrameSection;
+ delete DwarfAbbrevSection;
+ delete DwarfInfoSection;
+ delete DwarfLineSection;
+ delete DwarfFrameSection;
+ delete DwarfPubTypesSection;
+ delete DwarfDebugInlineSection;
+ delete DwarfStrSection;
+ delete DwarfLocSection;
+ delete DwarfARangesSection;
+ delete DwarfRangesSection;
+ delete DwarfMacroInfoSection;
+}
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 3418437..66e708f 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -49,6 +49,7 @@ enum NodeType {
RETURN,
CallSeqBegin,
CallSeqEnd,
+ CallPrototype,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index b406aa9..86ddd38 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,17 +14,19 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "NVPTXGenInstrInfo.inc"
#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include <cstdio>
using namespace llvm;
+// Pin the vtable to this file.
+void NVPTXInstrInfo::anchor() {}
+
// FIXME: Add the subtarget support on this constructor.
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
: NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index b1972e9..600fc5c 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -26,6 +26,7 @@ namespace llvm {
class NVPTXInstrInfo : public NVPTXGenInstrInfo {
NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
+ virtual void anchor();
public:
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 3e430bf..b23f1e4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2607,6 +2607,20 @@ def trapinst : NVPTXInst<(outs), (ins),
"trap;",
[(trap)]>;
+// Call prototype wrapper
+def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def CallPrototype
+ : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
+ [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+def ProtoIdent : Operand<i32> {
+ let PrintMethod = "printProtoIdent";
+}
+def CALL_PROTOTYPE
+ : NVPTXInst<(outs), (ins ProtoIdent:$ident),
+ "$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
+
+
+
include "NVPTXIntrinsics.td"
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index e57ace9..f8a692e 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -24,10 +24,10 @@ namespace llvm {
/// the ASMPrint interface.
///
class NVPTXSection : public MCSection {
-
+ virtual void anchor();
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
- ~NVPTXSection() {}
+ virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
/// to a section.
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 83dfe12..b64c308 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -36,7 +36,7 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
BasicBlock::iterator II = IB;
BasicBlock::iterator IE = BI->end();
- // Skit the first intruction. No splitting is needed at this
+ // Skit the first instruction. No splitting is needed at this
// point even if this is a bar.
while (II != IE) {
if (IntrinsicInst *inst = dyn_cast<IntrinsicInst>(II)) {
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index c4d0d6e..9771a17 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -20,6 +20,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+void NVPTXSubtarget::anchor() {}
+
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 670077d..004be11 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -25,7 +25,7 @@
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-
+ virtual void anchor();
std::string TargetName;
NVPTX::DrvInterface drvInterface;
bool Is64Bit;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 72afe8d..46edd6d 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -57,9 +57,6 @@ extern "C" void LLVMInitializeNVPTXTarget() {
RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
-
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
@@ -129,6 +126,7 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
disablePass(&BranchFolderPassID);
+ disablePass(&TailDuplicateID);
TargetPassConfig::addIRPasses();
addPass(createGenericToNVVMPass());
@@ -154,10 +152,30 @@ FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
assert(!RegAllocPass && "NVPTX uses no regalloc!");
- addPass(&StrongPHIEliminationID);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
}
void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
assert(!RegAllocPass && "NVPTX uses no regalloc!");
- addPass(&StrongPHIEliminationID);
+
+ addPass(&ProcessImplicitDefsID);
+ addPass(&LiveVariablesID);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
+
+ addPass(&TwoAddressInstructionPassID);
+ addPass(&RegisterCoalescerID);
+
+ // PreRA instruction scheduling.
+ if (addPass(&MachineSchedulerID))
+ printAndVerify("After Machine Scheduling");
+
+
+ addPass(&StackSlotColoringID);
+
+ // FIXME: Needs physical registers
+ //addPass(&PostRAMachineLICMID);
+
+ printAndVerify("After StackSlotColoring");
}
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index bfd6ab1..2a7394b 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -44,30 +44,10 @@ public:
DwarfMacroInfoSection = 0;
}
- ~NVPTXTargetObjectFile() {
- delete TextSection;
- delete DataSection;
- delete BSSSection;
- delete ReadOnlySection;
-
- delete StaticCtorSection;
- delete StaticDtorSection;
- delete LSDASection;
- delete EHFrameSection;
- delete DwarfAbbrevSection;
- delete DwarfInfoSection;
- delete DwarfLineSection;
- delete DwarfFrameSection;
- delete DwarfPubTypesSection;
- delete DwarfDebugInlineSection;
- delete DwarfStrSection;
- delete DwarfLocSection;
- delete DwarfARangesSection;
- delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
- }
+ virtual ~NVPTXTargetObjectFile();
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFile::Initialize(ctx, TM);
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
DataSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 3cc324b..7406207 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -79,7 +79,7 @@ ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
}
static cl::opt<bool>
-NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
cl::desc("NVVM reflection, enabled by default"));
char NVVMReflect::ID = 0;
@@ -88,7 +88,7 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
false)
static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"),
+ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
cl::desc("A list of string=num assignments"),
cl::ValueRequired);
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a8f7509..fe83fe1 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
@@ -174,6 +175,7 @@ struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
bool IsPPC64;
MCAsmParser &getParser() const { return Parser; }
@@ -218,8 +220,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -235,6 +238,10 @@ public:
virtual bool ParseDirective(AsmToken DirectiveID);
unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+
+ virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx);
};
/// PPCOperand - Instances of this class represent a parsed PowerPC machine
@@ -900,19 +907,19 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
RegNo = PPC::VRSAVE;
IntVal = 256;
return false;
- } else if (Name.substr(0, 1).equals_lower("r") &&
+ } else if (Name.startswith_lower("r") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("f") &&
+ } else if (Name.startswith_lower("f") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = FRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("v") &&
+ } else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
return false;
- } else if (Name.substr(0, 2).equals_lower("cr") &&
+ } else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
return false;
@@ -1353,6 +1360,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
switch (Kind) {
case MCK_0: ImmVal = 0; break;
case MCK_1: ImmVal = 1; break;
+ case MCK_2: ImmVal = 2; break;
+ case MCK_3: ImmVal = 3; break;
default: return Match_InvalidOperand;
}
@@ -1363,3 +1372,26 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
return Match_InvalidOperand;
}
+const MCExpr *
+PPCAsmParser::applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant,
+ MCContext &Ctx) {
+ switch (Variant) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HI:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ default:
+ return 0;
+ }
+}
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 08d7665..8281b5c 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -18,9 +18,17 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+ cl::desc("Use full register names when printing assembly"));
+
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -78,6 +86,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
}
+ // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
+ // used when converting a 32-bit float to a 64-bit float as part of
+ // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+ // as otherwise we have problems with incorrect register classes
+ // in machine instruction verification. For now, just avoid trying
+ // to print it as such an instruction has no effect (a 32-bit float
+ // in a register is already in 64-bit form, just with lower
+ // precision). FIXME: Is there a better solution?
+ if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -285,6 +304,9 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
/// stripRegisterPrefix - This method strips the character prefix from a
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
+ if (FullRegNames)
+ return RegName;
+
switch (RegName[0]) {
case 'r':
case 'f':
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index 45be471..3efa5ec 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMPowerPCDesc
PPCMCCodeEmitter.cpp
PPCMCExpr.cpp
PPCPredicates.cpp
+ PPCMachObjectWriter.cpp
PPCELFObjectWriter.cpp
)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index b2a8701..0d42081 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -16,9 +16,9 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -69,19 +69,6 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
}
namespace {
-class PPCMachObjectWriter : public MCMachObjectTargetWriter {
-public:
- PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
-
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {
- llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
- }
-};
class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
@@ -145,14 +132,17 @@ public:
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- // Can't emit NOP with size not multiple of 32-bits
- if (Count % 4 != 0)
- return false;
-
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(0x60000000);
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0); break;
+ }
+
return true;
}
@@ -174,12 +164,11 @@ namespace {
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
- return createMachObjectWriter(new PPCMachObjectWriter(
- /*Is64Bit=*/is64,
- (is64 ? object::mach::CTM_PowerPC64 :
- object::mach::CTM_PowerPC),
- object::mach::CSPPC_ALL),
- OS, /*IsLittleEndian=*/false);
+ return createPPCMachObjectWriter(
+ OS,
+ /*Is64Bit=*/is64,
+ (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -206,10 +195,9 @@ namespace {
} // end anonymous namespace
-
-
-
-MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 6822507..f3dddce 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -22,7 +22,6 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
}
IsLittleEndian = false;
- PCSymbol = ".";
CommentString = ";";
ExceptionsType = ExceptionHandling::DwarfCFI;
@@ -47,15 +46,14 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
CommentString = "#";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
-
+
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
- PCSymbol = ".";
+ DollarIsPC = true;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 7b4ed9f..1530e77 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -15,6 +15,7 @@
#define PPCTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit PPCMCAsmInfoDarwin(bool is64Bit);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfo {
+ class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit PPCLinuxMCAsmInfo(bool is64Bit);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 59ba9c4..346a9be 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -76,11 +77,17 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
+ // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
+ // It's just a nop to keep the register classes happy, so don't
+ // generate anything.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
// BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
- unsigned Opcode = MI.getOpcode();
if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
Opcode == PPC::BL8_NOP_TLS)
Size = 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 9529267..d7e8402 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -54,7 +54,7 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const {
MCValue Value;
- if (!getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+ if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
return false;
if (Value.isAbsolute()) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5f7a39a..f18d095 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -14,13 +14,16 @@
#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "PPCMCAsmInfo.h"
+#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -34,6 +37,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+PPCTargetStreamer::~PPCTargetStreamer() {}
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -101,6 +107,29 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
+namespace {
+class PPCTargetAsmStreamer : public PPCTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
+ virtual void emitTCEntry(const MCSymbol &S) {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ OS << '\n';
+ }
+};
+
+class PPCTargetELFStreamer : public PPCTargetStreamer {
+ virtual void emitTCEntry(const MCSymbol &S) {
+ // Creates a R_PPC64_TOC relocation
+ Streamer->EmitSymbolValue(&S, 8);
+ }
+};
+}
+
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
@@ -111,7 +140,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (Triple(TT).isOSDarwin())
return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ PPCTargetStreamer *S = new PPCTargetELFStreamer();
+ return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
+ PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -171,6 +213,11 @@ extern "C" void LLVMInitializePowerPCTargetMC() {
TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 9f29132..0b0ca24 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -40,12 +40,17 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI);
+/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
new file mode 100644
index 0000000..bbafe2e
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue);
+
+ void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ if (Writer->is64Bit()) {
+ report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
+ } else
+ RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+/// computes the log2 of the size of the relocation,
+/// used for relocation_info::r_length.
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return 0;
+ case FK_PCRel_2:
+ case FK_Data_2:
+ return 1;
+ case FK_PCRel_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_br24:
+ case FK_Data_4:
+ return 2;
+ case FK_PCRel_8:
+ case FK_Data_8:
+ return 3;
+ }
+ return 0;
+}
+
+/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
+/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+static unsigned getRelocType(const MCValue &Target,
+ const MCFixupKind FixupKind, // from
+ // Fixup.getKind()
+ const bool IsPCRel) {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // determine the type of the relocation
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
+ if (IsPCRel) { // relative to PC
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (relative)");
+ case PPC::fixup_ppc_br24:
+ Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = MachO::PPC_RELOC_BR14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (absolute)!");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16_SECTDIFF;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ break;
+ case FK_Data_2:
+ break;
+ }
+ }
+ return Type;
+}
+
+static void makeRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t FixupOffset, const uint32_t Index,
+ const unsigned IsPCRel, const unsigned Log2Size,
+ const unsigned IsExtern, const unsigned Type) {
+ MRE.r_word0 = FixupOffset;
+ // The bitfield offsets that work (as determined by trial-and-error)
+ // are different than what is documented in the mach-o manuals.
+ // This appears to be an endianness issue; reversing the order of the
+ // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // breaks x86/ARM assembly).
+ MRE.r_word1 = ((Index << 8) | // was << 0
+ (IsPCRel << 7) | // was << 24
+ (Log2Size << 5) | // was << 25
+ (IsExtern << 4) | // was << 27
+ (Type << 0)); // was << 28
+}
+
+static void
+makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t Addr, const unsigned Type,
+ const unsigned Log2Size, const unsigned IsPCRel,
+ const uint32_t Value2) {
+ // For notes on bitfield positions and endianness, see:
+ // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
+ MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
+ (IsPCRel << 30) | MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+}
+
+/// Compute fixup offset (address).
+static uint32_t getFixupOffset(const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ // On Mach-O, ppc_fixup_half16 relocations must refer to the
+ // start of the instruction, not the second halfword, as ELF does
+ if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ FixupOffset &= ~uint32_t(3);
+ return FixupOffset;
+}
+
+/// \return false if falling back to using non-scattered relocation,
+/// otherwise true for normal scattered relocation.
+/// based on X86MachObjectWriter::RecordScatteredRelocation
+/// and ARMMachObjectWriter::RecordScatteredRelocation
+bool PPCMachObjectWriter::RecordScatteredRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue) {
+ // caller already computes these, can we just pass and reuse?
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ const MCFixupKind FK = Fixup.getKind();
+ const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned Type = getRelocType(Target, FK, IsPCRel);
+
+ // Is this a local or SECTDIFF relocation entry?
+ // SECTDIFF relocation entries have symbol subtractions,
+ // and require two entries, the first for the add-symbol value,
+ // the second for the subtract-symbol value.
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+ // FIXME: does FixedValue get used??
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::PPC_RELOC_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
+ // X86 had this piece, but ARM does not
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") +
+ Buffer + ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
+ // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
+ // see PPCMCExpr::EvaluateAsRelocatableImpl()
+ uint32_t other_half = 0;
+ switch (Type) {
+ case MachO::PPC_RELOC_LO16_SECTDIFF:
+ other_half = (FixedValue >> 16) & 0xffff;
+ // applyFixupOffset longer extracts the high part because it now assumes
+ // this was already done.
+ // It looks like this is not true for the FixedValue needed with Mach-O
+ // relocs.
+ // So we need to adjust FixedValue again here.
+ FixedValue &= 0xffff;
+ break;
+ case MachO::PPC_RELOC_HA16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue =
+ ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
+ break;
+ case MachO::PPC_RELOC_HI16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue = (FixedValue >> 16) & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Invalid PPC scattered relocation type.");
+ break;
+ }
+
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
+ Log2Size, IsPCRel, Value2);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
+}
+
+// see PPCELFObjectWriter for a general outline of cases
+void PPCMachObjectWriter::RecordPPCRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ const MCFixupKind FK = Fixup.getKind(); // unsigned
+ const unsigned Log2Size = getFixupKindLog2Size(FK);
+ const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB() &&
+ // Q: are branch targets ever scattered?
+ RelocType != MachO::PPC_RELOC_BR24 &&
+ RelocType != MachO::PPC_RELOC_BR14) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+ return;
+ }
+
+ // this doesn't seem right for RIT_PPC_BR24
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // See <reloc.h>.
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = RelocType;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ // the above line stolen from ARM, not sure
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD =
+ Asm.getSectionData(SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
+ Type);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(
+ new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/false);
+}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 806822c..54e3d40 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -57,6 +57,8 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
+ "Enable the fcpsgn instruction">;
def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
"Enable the fre instruction">;
def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
@@ -85,6 +87,13 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
+def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
+ "Enable VSX instructions">;
+
+def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
+ "Treat mftb as deprecated">;
+def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
+ "Treat vector data stream cache control instructions as deprecated">;
// Note: Future features to add when support is extended to more
// recent ISA levels:
@@ -146,10 +155,10 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -185,29 +194,32 @@ def : ProcessorModel<"g5", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
FeatureFRES, FeatureFRSQRTE,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */]>;
+ /*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */, FeatureQPX]>;
+ /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -220,32 +232,37 @@ def : ProcessorModel<"pwr5", G5Model,
[DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, Feature64Bit]>;
+ FeatureSTFIWX, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr5x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit]>;
+ FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", G5Model,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index bbfad87..ada34ed 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -23,6 +23,7 @@
#include "MCTargetDesc/PPCMCExpr.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -202,7 +203,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
.getGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -212,12 +213,12 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
getHiddenGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
O << *SymToPrint;
@@ -363,7 +364,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
MCSymbol *MOSymbol = 0;
if (MO.isGlobal())
- MOSymbol = Mang->getSymbol(MO.getGlobal());
+ MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
@@ -402,7 +403,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsCommon = GVar && RealGValue->hasCommonLinkage();
@@ -413,7 +414,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -438,18 +440,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else if (MO.isCPI())
+ else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
+ if (TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage())
+ RealGValue->hasAvailableExternallyLinkage() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
@@ -479,14 +485,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsFunction = !GVar;
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal)
+ if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -502,7 +508,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
@@ -520,7 +526,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
@@ -534,7 +540,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
@@ -550,7 +556,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
@@ -571,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
@@ -586,7 +592,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
@@ -602,7 +608,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
@@ -623,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
@@ -638,7 +644,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
@@ -654,7 +660,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
@@ -704,6 +710,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
break;
case PPC::LD:
case PPC::STD:
+ case PPC::LWA_32:
case PPC::LWA: {
// Verify alignment is legal, so we don't create relocations
// that can't be supported.
@@ -765,6 +772,9 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
+
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
@@ -775,7 +785,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- OutStreamer.EmitTCEntry(*S);
+ TS.emitTCEntry(*S);
}
}
@@ -1051,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
- StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
}
}
}
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 4e30c537..4224ae2 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -253,12 +253,19 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
case Intrinsic::sin:
case Intrinsic::cos:
return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
case Intrinsic::rint: Opcode = ISD::FRINT; break;
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
}
@@ -283,8 +290,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
default: return true;
case LibFunc::copysign:
case LibFunc::copysignf:
- case LibFunc::copysignl:
continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
case LibFunc::fabs:
case LibFunc::fabsf:
case LibFunc::fabsl:
@@ -309,6 +317,10 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
case LibFunc::rintf:
case LibFunc::rintl:
Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
case LibFunc::trunc:
case LibFunc::truncf:
case LibFunc::truncl:
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index a584188..e8e7f4c 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -37,6 +37,37 @@ def RetCC_PPC : CallingConv<[
]>;
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic. FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple calling convention for 64-bit ELF PowerPC fast isel.
+// Only handle ints and floats. All ints are promoted to i64.
+// Vector types and quadword ints are not handled.
+def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
+]>;
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64. Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 8cbf1fb..09117e7 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -37,6 +37,25 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// FastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
using namespace llvm;
namespace {
@@ -52,7 +71,7 @@ typedef struct Address {
int FI;
} Base;
- int Offset;
+ long Offset;
// Innocuous defaults for our address.
Address()
@@ -89,15 +108,76 @@ class PPCFastISel : public FastISel {
virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI);
virtual bool FastLowerArguments();
+ virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectCall(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
// Utility routines.
private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt, unsigned DestReg);
+ bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC, bool IsZExt = true,
+ unsigned FP64LoadOpc = PPC::LFD);
+ bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
+ bool PPCComputeAddress(const Value *Obj, Address &Addr);
+ void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg);
+ bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
unsigned PPCMaterializeInt(const Constant *C, MVT VT);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
+
+ // Call handling routines.
+ private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg);
+ CCAssignFn *usePPC32CCs(unsigned Flag);
private:
#include "PPCGenFastISel.inc"
@@ -106,10 +186,1601 @@ class PPCFastISel : public FastISel {
} // end anonymous namespace
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+ if (Flag == 1)
+ return CC_PPC32_SVR4;
+ else if (Flag == 2)
+ return CC_PPC32_SVR4_ByVal;
+ else if (Flag == 3)
+ return CC_PPC32_SVR4_VarArg;
+ else
+ return RetCC_PPC;
+}
+
+static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // These are not representable with any single compare.
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ default:
+ return Optional<PPC::Predicate>();
+
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::ICMP_EQ:
+ return PPC::PRED_EQ;
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return PPC::PRED_GT;
+
+ case CmpInst::FCMP_OGE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return PPC::PRED_GE;
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return PPC::PRED_LT;
+
+ case CmpInst::FCMP_OLE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return PPC::PRED_LE;
+
+ case CmpInst::FCMP_ONE:
+ case CmpInst::ICMP_NE:
+ return PPC::PRED_NE;
+
+ case CmpInst::FCMP_ORD:
+ return PPC::PRED_NU;
+
+ case CmpInst::FCMP_UNO:
+ return PPC::PRED_UN;
+ }
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel, and return its equivalent machine type in VT.
+// FIXME: Copied directly from ARM -- factor into base class?
+bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT Evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (Evt == MVT::Other || !Evt.isSimple()) return false;
+ VT = Evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel as a load target, and return its equivalent machine type in VT.
+bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
+ return true;
+ }
+
+ return false;
+}
+
+// Given a value Obj, create an Address object Addr that represents its
+// address. Return false if we can't handle it.
+bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ long TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
+ II != IE; ++II, ++GTI) {
+ const Value *Op = *II;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // FIXME: References to parameters fall through to the behavior
+ // below. They should be able to reference a frame index since
+ // they are stored to the stack, so we can get "ld rx, offset(r1)"
+ // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
+ // just contain the parameter. Try to handle this with a FI.
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0)
+ Addr.Base.Reg = getRegForValue(Obj);
+
+ // Prevent assignment of base register to X0, which is inappropriate
+ // for loads and stores alike.
+ if (Addr.Base.Reg != 0)
+ MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ return Addr.Base.Reg != 0;
+}
+
+// Fix up some addresses that can't be used directly. For example, if
+// an offset won't fit in an instruction field, we may need to move it
+// into an index register.
+void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg) {
+
+ // Check whether the offset fits in the instruction field.
+ if (!isInt<16>(Addr.Offset))
+ UseOffset = false;
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ if (!UseOffset) {
+ IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
+ : Type::getInt64Ty(*Context));
+ const ConstantInt *Offset =
+ ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
+ IndexReg = PPCMaterializeInt(Offset, MVT::i64);
+ assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
+ }
+}
+
+// Emit a load instruction if possible, returning true if we succeeded,
+// otherwise false. See commentary below for how the register class of
+// the load is determined.
+bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC,
+ bool IsZExt, unsigned FP64LoadOpc) {
+ unsigned Opc;
+ bool UseOffset = true;
+
+ // If ResultReg is given, it determines the register class of the load.
+ // Otherwise, RC is the register class to use. If the result of the
+ // load isn't anticipated in this block, both may be zero, in which
+ // case we must make a conservative guess. In particular, don't assign
+ // R0 or X0 to the result register, as the result may be used in a load,
+ // store, add-immediate, or isel that won't permit this. (Though
+ // perhaps the spill and reload of live-exit values would handle this?)
+ const TargetRegisterClass *UseRC =
+ (ResultReg ? MRI.getRegClass(ResultReg) :
+ (RC ? RC :
+ (VT == MVT::f64 ? &PPC::F8RCRegClass :
+ (VT == MVT::f32 ? &PPC::F4RCRegClass :
+ (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass)))));
+
+ bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
+ break;
+ case MVT::i16:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
+ (Is32BitInt ? PPC::LHA : PPC::LHA8));
+ break;
+ case MVT::i32:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
+ (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+ if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
+ UseOffset = false;
+ break;
+ case MVT::i64:
+ Opc = PPC::LD;
+ assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
+ "64-bit load with 32-bit target??");
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::LFS;
+ break;
+ case MVT::f64:
+ Opc = FP64LoadOpc;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+ if (ResultReg == 0)
+ ResultReg = createResultReg(UseRC);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::LBZ: Opc = PPC::LBZX; break;
+ case PPC::LBZ8: Opc = PPC::LBZX8; break;
+ case PPC::LHZ: Opc = PPC::LHZX; break;
+ case PPC::LHZ8: Opc = PPC::LHZX8; break;
+ case PPC::LHA: Opc = PPC::LHAX; break;
+ case PPC::LHA8: Opc = PPC::LHAX8; break;
+ case PPC::LWZ: Opc = PPC::LWZX; break;
+ case PPC::LWZ8: Opc = PPC::LWZX8; break;
+ case PPC::LWA: Opc = PPC::LWAX; break;
+ case PPC::LWA_32: Opc = PPC::LWAX_32; break;
+ case PPC::LD: Opc = PPC::LDX; break;
+ case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFD: Opc = PPC::LFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a load instruction.
+bool PPCFastISel::SelectLoad(const Instruction *I) {
+ // FIXME: No atomic loads are supported.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. This is necessary
+ // to constrain RA from using R0/X0 when this is not legal.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+ return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Emit a store instruction to store SrcReg at Addr.
+bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
+ assert(SrcReg && "Nothing to store!");
+ unsigned Opc;
+ bool UseOffset = true;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
+ bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::STB : PPC::STB8;
+ break;
+ case MVT::i16:
+ Opc = Is32BitInt ? PPC::STH : PPC::STH8;
+ break;
+ case MVT::i32:
+ assert(Is32BitInt && "Not GPRC for i32??");
+ Opc = PPC::STW;
+ break;
+ case MVT::i64:
+ Opc = PPC::STD;
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::STFS;
+ break;
+ case MVT::f64:
+ Opc = PPC::STFD;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::STB: Opc = PPC::STBX; break;
+ case PPC::STH : Opc = PPC::STHX; break;
+ case PPC::STW : Opc = PPC::STWX; break;
+ case PPC::STB8: Opc = PPC::STBX8; break;
+ case PPC::STH8: Opc = PPC::STHX8; break;
+ case PPC::STW8: Opc = PPC::STWX8; break;
+ case PPC::STD: Opc = PPC::STDX; break;
+ case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFD: Opc = PPC::STFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a store instruction.
+bool PPCFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // FIXME: No atomics loads are supported.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(Op0->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!PPCEmitStore(VT, SrcReg, Addr))
+ return false;
+
+ return true;
+}
+
+// Attempt to fast-select a branch instruction.
+bool PPCFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
+
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
+
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
+
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
+ }
+
+ // FIXME: ARM looks for a case where the block containing the compare
+ // has been split from the block containing the branch. If this happens,
+ // there is a vreg available containing the result of the compare. I'm
+ // not sure we can do much, as we've lost the predicate information with
+ // the compare instruction -- we have a 4-bit CR but don't know which bit
+ // to test here.
+ return false;
+}
+
+// Attempt to emit a compare of the two source values. Signed and unsigned
+// comparisons are supported. Return false if we can't handle it.
+bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
+ bool IsZExt, unsigned DestReg) {
+ Type *Ty = SrcValue1->getType();
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ // See if operand 2 is an immediate encodeable in the compare.
+ // FIXME: Operands are not in canonical order at -O0, so an immediate
+ // operand in position 1 is a lost opportunity for now. We are
+ // similar to ARM in this regard.
+ long Imm = 0;
+ bool UseImm = false;
+
+ // Only 16-bit integer constants can be represented in compares for
+ // PowerPC. Others will be materialized into a register.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
+ if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i8 || SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
+ if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
+ UseImm = true;
+ }
+ }
+
+ unsigned CmpOpc;
+ bool NeedsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ CmpOpc = PPC::FCMPUS;
+ break;
+ case MVT::f64:
+ CmpOpc = PPC::FCMPUD;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ NeedsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
+ break;
+ case MVT::i64:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
+ if (NeedsExt) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg1 = ExtReg;
+
+ if (!UseImm) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg2 = ExtReg;
+ }
+ }
+
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addImm(Imm);
+
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ Addr.Offset = 4;
+ if (!IsSigned)
+ LoadOpc = PPC::LFIWZX;
+ else if (PPCSubTarget.hasLFIWAX())
+ LoadOpc = PPC::LFIWAX;
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+// Attempt to fast-select a binary integer operation that isn't already
+// handled automatically.
+bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. If there is no register,
+ // make a conservative choice (don't assign R0).
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ &PPC::GPRC_and_GPRC_NOR0RegClass);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
+ break;
+ case ISD::OR:
+ Opc = IsGPRC ? PPC::OR : PPC::OR8;
+ break;
+ case ISD::SUB:
+ Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
+ break;
+ }
+
+ unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // Handle case of small immediate operand.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt &CIVal = ConstInt->getValue();
+ int Imm = (int)CIVal.getSExtValue();
+ bool UseImm = true;
+ if (isInt<16>(Imm)) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Missing case!");
+ case PPC::ADD4:
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ break;
+ case PPC::ADD8:
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ break;
+ case PPC::OR:
+ Opc = PPC::ORI;
+ break;
+ case PPC::OR8:
+ Opc = PPC::ORI8;
+ break;
+ case PPC::SUBF:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ case PPC::SUBF8:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ }
+
+ if (UseImm) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addImm(Imm);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ // Reg-reg case.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ // Reverse operands for subtract-from.
+ if (ISDOpcode == ISD::SUB)
+ std::swap(SrcReg1, SrcReg2);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register.
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg) {
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ UsedRegs.push_back(SourcePhysReg);
+ UpdateValueMap(I, ResultReg);
+ }
+}
+
+// Attempt to fast-select a call instruction.
+bool PPCFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall())
+ return false;
+
+ // Obtain calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool IsVarArg = FTy->isVarArg();
+
+ // Not ready for varargs yet.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CS.arg_size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
+ II != IE; ++II) {
+ // FIXME: ARM does something for intrinsic calls here, check into that.
+
+ unsigned AttrIdx = II - CS.arg_begin() + 1;
+
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
+ CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
+ CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ return false;
+
+ ISD::ArgFlagsTy Flags;
+ if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
+ Flags.setZExt();
+
+ Type *ArgTy = (*II)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(*II);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*II);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
+
+ // Set all unused physregs defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+ const Value *RV = Ret->getOperand(0);
+
+ // FIXME: Only one output register for now.
+ if (ValLocs.size() > 1)
+ return false;
+
+ // Special case for returning a constant integer of any size.
+ // Materialize the constant as an i64 and copy it to the return
+ // register. This avoids an unnecessary extend or truncate.
+ if (isa<ConstantInt>(*RV)) {
+ const Constant *C = cast<Constant>(RV);
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
+ unsigned RetReg = ValLocs[0].getLocReg();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ RetReg).addReg(SrcReg);
+ RetRegs.push_back(RetReg);
+
+ } else {
+ unsigned Reg = getRegForValue(RV);
+
+ if (Reg == 0)
+ return false;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+ CCValAssign &VA = ValLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ RetRegs.push_back(VA.getLocReg());
+ unsigned SrcReg = Reg + VA.getValNo();
+
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple())
+ return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getLocVT();
+
+ if (RVVT != DestVT && RVVT != MVT::i8 &&
+ RVVT != MVT::i16 && RVVT != MVT::i32)
+ return false;
+
+ if (RVVT != DestVT) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ llvm_unreachable("Full value assign but types don't match?");
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ case CCValAssign::SExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ }
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), RetRegs[i])
+ .addReg(SrcReg);
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BLR));
+
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+
+ return true;
+}
+
+// Attempt to emit an integer extend of SrcReg into DestReg. Both
+// signed and zero extensions are supported. Return false if we
+// can't handle it.
+bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i64)
+ return false;
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ return false;
+
+ // Signed extensions use EXTSB, EXTSH, EXTSW.
+ if (!IsZExt) {
+ unsigned Opc;
+ if (SrcVT == MVT::i8)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+ else if (SrcVT == MVT::i16)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+ else {
+ assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+ Opc = PPC::EXTSW_32_64;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Unsigned 32-bit extensions use RLWINM.
+ } else if (DestVT == MVT::i32) {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 24;
+ else {
+ assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+ MB = 16;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+ DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+ // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+ } else {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 56;
+ else if (SrcVT == MVT::i16)
+ MB = 48;
+ else
+ MB = 32;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::RLDICL_32_64), DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select an indirect branch instruction.
+bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0)
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
+ .addReg(AddrReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
+ return true;
+}
+
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
+ return false;
+
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool IsZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+
+ // If we know the register class needed for the result of this
+ // instruction, use it. Otherwise pick the register class of the
+ // correct size that does not contain X0/R0, since we don't know
+ // whether downstream uses permit that assignment.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass));
+ unsigned ResultReg = createResultReg(RC);
+
+ if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
// Attempt to fast-select an instruction that wasn't handled by
-// the table-generated machinery. TBD.
+// the table-generated machinery.
bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
- return I && false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ if (dyn_cast<IntrinsicInst>(I))
+ return false;
+ return SelectCall(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ // Here add other flavors of Instruction::XXX that automated
+ // cases don't catch. For example, switches are terminators
+ // that aren't yet handled.
+ default:
+ break;
+ }
+ return false;
}
// Materialize a floating-point constant into a register, and return
@@ -131,21 +1802,94 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
(VT == MVT::f32) ? 4 : 8, Align);
- // For small code model, generate a LDtocCPT.
- if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+ unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
- DestReg)
- .addConstantPoolIndex(Idx).addReg(PPC::X2).addMemOperand(MMO);
- else {
+ TmpReg)
+ .addConstantPoolIndex(Idx).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg).addMemOperand(MMO);
+ } else {
// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
- unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
- unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
- .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
- .addReg(TmpReg)
- .addMemOperand(MMO);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
+ }
+
+ return DestReg;
+}
+
+// Materialize the address of a global value into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ assert(VT == MVT::i64 && "Non-address!");
+ const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Global values may be plain old object addresses, TLS object
+ // addresses, constant pool entries, or jump tables. How we generate
+ // code for these may depend on small, medium, or large code model.
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ // FIXME: Jump tables are not yet required because fast-isel doesn't
+ // handle switches; if that changes, we need them as well. For now,
+ // what follows assumes everything's a generic (or TLS) global address.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias, use the aliasee for determining thread-locality.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ // FIXME: We don't yet handle the complexity of TLS.
+ bool IsTLS = GVar && GVar->isThreadLocal();
+ if (IsTLS)
+ return 0;
+
+ // For small code model, generate a simple TOC load.
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
+ .addGlobalAddress(GV).addReg(PPC::X2);
+ else {
+ // If the address is an externally defined symbol, a symbol with
+ // common or externally available linkage, a function address, or a
+ // jump table address (not yet needed), or if we are generating code
+ // for large code model, we generate:
+ // LDtocL(GV, ADDIStocHA(%X2, GV))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, GV), GV)
+ // Either way, start with the ADDIStocHA:
+ unsigned HighPartReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
+
+ // !GVar implies a function address. An external variable is one
+ // without an initializer.
+ // If/when switches are implemented, jump tables should be handled
+ // on the "if" path here.
+ if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
+ GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ DestReg).addGlobalAddress(GV).addReg(HighPartReg);
+ else
+ // Otherwise generate the ADDItocL.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
+ DestReg).addReg(HighPartReg).addGlobalAddress(GV);
}
return DestReg;
@@ -283,23 +2027,112 @@ unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return PPCMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return PPCMaterializeGV(GV, VT);
else if (isa<ConstantInt>(C))
return PPCMaterializeInt(C, VT);
- // TBD: Global values.
return 0;
}
// Materialize the address created by an alloca into a register, and
-// return the register number (or zero if we failed to handle it). TBD.
+// return the register number (or zero if we failed to handle it).
unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
- return AI && 0;
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
}
-// Fold loads into extends when possible. TBD.
+// Fold loads into extends when possible.
+// FIXME: We can have multiple redundant extend/trunc instructions
+// following a load. The folding only picks up one. Extend this
+// to check subsequent instructions for the same pattern and remove
+// them. Thus ResultReg should be the def reg for the last redundant
+// instruction in a chain, and all intervening instructions can be
+// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
+// to add ELF64-NOT: rldicl to the appropriate tests when this works.
bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
- return MI && OpNo && LI && false;
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ bool IsZExt = false;
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+
+ case PPC::RLDICL:
+ case PPC::RLDICL_32_64: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 56) ||
+ (VT == MVT::i16 && MB <= 48) ||
+ (VT == MVT::i32 && MB <= 32))
+ break;
+ return false;
+ }
+
+ case PPC::RLWINM:
+ case PPC::RLWINM8: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 24) ||
+ (VT == MVT::i16 && MB <= 16))
+ break;
+ return false;
+ }
+
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ /* There is no sign-extending load-byte instruction. */
+ return false;
+
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64: {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+ }
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(LI->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+
+ if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
+ return false;
+
+ MI->eraseFromParent();
+ return true;
}
// Attempt to lower call arguments in a faster way than done by
@@ -312,6 +2145,81 @@ bool PPCFastISel::FastLowerArguments() {
return false;
}
+// Handle materializing integer constants into a register. This is not
+// automatically generated for PowerPC, so must be explicitly created here.
+unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+
+ if (Opc != ISD::Constant)
+ return 0;
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else
+ return PPCMaterialize32BitInt(Imm, RC);
+}
+
+// Override for ADDI and ADDI8 to set the correct register class
+// on RHS operand 0. The automatic infrastructure naively assumes
+// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
+// for these cases. At the moment, none of the other automatically
+// generated RI instructions require special treatment. However, once
+// SelectSelect is implemented, "isel" requires similar handling.
+//
+// Also be conservative about the output register class. Avoid
+// assigning R0 or X0 to the output register for GPRC and G8RC
+// register classes, as any such result could be used in ADDI, etc.,
+// where those regs have another meaning.
+unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ if (MachineInstOpcode == PPC::ADDI)
+ MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ else if (MachineInstOpcode == PPC::ADDI8)
+ MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ Op0, Op0IsKill, Imm);
+}
+
+// Override for instructions with one register operand to avoid use of
+// R0/X0. The automatic infrastructure isn't aware of the context so
+// we must be conservative.
+unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+}
+
+// Override for instructions with two register operands to avoid use
+// of R0/X0. The automatic infrastructure isn't aware of the context
+// so we must be conservative.
+unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ Op1, Op1IsKill);
+}
+
namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 24d3a0b..0ac2ced 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -204,10 +204,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned FrameSize =
UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned TargetAlign = getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
+ // Get stack alignments. The frame must be aligned to the greatest of these:
+ unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
+ unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
const PPCRegisterInfo *RegInfo =
@@ -346,12 +345,20 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ assert((isDarwinABI || isSVR4ABI) &&
+ "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+
// Prepare for frame info.
MCSymbol *FrameLabel = 0;
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
- if (!Subtarget.isSVR4ABI())
+ if (!isSVR4ABI)
for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
HandleVRSaveUpdate(MBBI, TII);
@@ -371,23 +378,52 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (MFI->isFrameAddressTaken())
replaceFPWithRealFP(MF);
- // Get processor type.
- bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
- bool isDarwinABI = Subtarget.isDarwinABI();
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
+ const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
+ : PPC::MFLR );
+ const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
+ : PPC::STW );
+ const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
+ : PPC::STWU );
+ const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
+ : PPC::STWUX);
+ const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+ : PPC::OR );
+ const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
+ : PPC::SUBFC);
+ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
+ : PPC::SUBFIC);
+
+ // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
+ // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
+ // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
+ // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
+ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
+ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -399,7 +435,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
int BPOffset = 0;
if (HasBP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
@@ -410,181 +446,116 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+ // Get stack alignments.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (HasBP && MaxAlign > 1)
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
+ bool isLargeFrame = !isInt<16>(NegFrameSize);
- if (!MustSaveCRs.empty()) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
- }
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X31)
- .addImm(FPOffset)
- .addReg(PPC::X1);
-
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X30)
- .addImm(BPOffset)
- .addReg(PPC::X1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X0)
- .addImm(LROffset)
- .addReg(PPC::X1);
-
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(PPC::X12, getKillRegState(true))
- .addImm(8)
- .addReg(PPC::X1);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
-
- if (HasFP)
- // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
- // offsets of R1 is not allowed.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R31)
- .addImm(FPOffset)
- .addReg(PPC::R1);
-
- if (HasBP)
- // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
- // offsets of R1 is not allowed.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R30)
- .addImm(BPOffset)
- .addReg(PPC::R1);
-
- assert(MustSaveCRs.empty() &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R0)
- .addImm(LROffset)
- .addReg(PPC::R1);
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Prologue CR saving supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
}
- // Skip if a leaf routine.
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (MustSaveLR)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(8)
+ .addReg(SPReg);
+
+ // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
if (!FrameSize) return;
- // Get stack alignments.
- unsigned MaxAlign = MFI->getMaxAlignment();
-
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
- if (!isPPC64) {
- // PPC32.
-
- if (HasBP) {
- // Save a copy of r1 as the base pointer.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R30)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- }
- if (HasBP && MaxAlign > 1) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R1)
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2_32(MaxAlign))
.addImm(31);
- if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(NegFrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R12)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R12)
- .addReg(PPC::R12, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addReg(PPC::R12, RegState::Kill);
- }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
- .addReg(PPC::R1)
- .addImm(NegFrameSize)
- .addReg(PPC::R1);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
.addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- }
- } else { // PPC64.
- if (HasBP) {
- // Save a copy of r1 as the base pointer.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X30)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
}
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
- if (HasBP && MaxAlign > 1) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
- .addReg(PPC::X1)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X12)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X12)
- .addReg(PPC::X12, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFC8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addReg(PPC::X12, RegState::Kill);
- }
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
- .addReg(PPC::X1)
- .addImm(NegFrameSize)
- .addReg(PPC::X1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- }
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
}
// Add the "machine moves" for the instructions we generated above, but in
@@ -600,22 +571,19 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
if (HasFP) {
- unsigned Reg = isPPC64 ? PPC::X31 : PPC::R31;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
}
if (HasBP) {
- unsigned Reg = isPPC64 ? PPC::X30 : PPC::R30;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
}
if (MustSaveLR) {
- unsigned Reg = isPPC64 ? PPC::LR8 : PPC::LR;
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
MMI.addFrameInst(
MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
}
@@ -625,15 +593,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
- if (!isPPC64) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
- }
+ BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
if (needsFrameMoves) {
ReadyLabel = MMI.getContext().CreateTempSymbol();
@@ -641,9 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark effective beginning of when frame pointer is ready.
BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
- unsigned Reg = HasFP ? (isPPC64 ? PPC::X31 : PPC::R31)
- : (isPPC64 ? PPC::X1 : PPC::R1);
- Reg = MRI->getDwarfRegNum(Reg, true);
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
}
}
@@ -664,19 +624,16 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
- if (Subtarget.isSVR4ABI()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
- && MustSaveCRs.empty())
- continue;
+ if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && MustSaveCRs.empty())
+ continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
- if (Subtarget.isSVR4ABI()
- && Subtarget.isPPC64()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
MMI.addFrameInst(MCCFIInstruction::createOffset(
Label, MRI->getDwarfRegNum(PPC::CR2, true), 8));
- continue;
+ continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
@@ -707,7 +664,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
RetOpcode == PPC::TCRETURNai8) &&
"Can only insert epilog into returning blocks");
- // Get alignment info so we know how to restore r1
+ // Get alignment info so we know how to restore the SP.
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Get the number of bytes allocated from the FrameInfo.
@@ -715,21 +672,41 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
+ // Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
+ : PPC::MTLR );
+ const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
+ : PPC::LWZ );
+ const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
+ : PPC::ADDI );
+ const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
+ : PPC::ADD4 );
+
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -741,7 +718,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
int BPOffset = 0;
if (HasBP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
@@ -773,106 +750,76 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
FrameSize += StackAdj;
}
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple LD/LWZ immediate offset operand.
+ bool isLargeFrame = !isInt<16>(FrameSize);
+
if (FrameSize) {
- // The loaded (or persistent) stack pointer value is offset by the 'stwu'
- // on entry to the function. Add this offset back now.
- if (!isPPC64) {
- // If this function contained a fastcc call and GuaranteedTailCallOpt is
- // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
- // call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
- .addReg(PPC::R1)
- .addReg(PPC::R31)
- .addReg(PPC::R0);
- } else if (isInt<16>(FrameSize) &&
- !HasBP &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R1).addImm(FrameSize);
+ // In the prologue, the loaded (or persistent) stack pointer value is offset
+ // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
+
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall()) {
+ assert(HasFP && "Expecting a valid frame pointer.");
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(FPReg).addImm(FrameSize);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
- .addImm(0).addReg(PPC::R1);
- }
- } else {
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
- .addReg(PPC::X1)
- .addReg(PPC::X31)
- .addReg(PPC::X0);
- } else if (isInt<16>(FrameSize) && !HasBP &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X1).addImm(FrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
- .addImm(0).addReg(PPC::X1);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
}
+ } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg)
+ .addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+ .addImm(0)
+ .addReg(SPReg);
}
- }
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(LROffset).addReg(PPC::X1);
-
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
- .addImm(8).addReg(PPC::X1);
+ }
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
- .addImm(FPOffset).addReg(PPC::X1);
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X30)
- .addImm(BPOffset).addReg(PPC::X1);
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Epilogue CR restoring supported only in 64-bit mode");
- if (!MustSaveCRs.empty())
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
- .addReg(PPC::X12, getKillRegState(i == e-1));
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
+ .addImm(8)
+ .addReg(SPReg);
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
- .addImm(LROffset).addReg(PPC::R1);
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
- assert(MustSaveCRs.empty() &&
- "Epilogue CR restoring supported only in 64-bit mode");
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
- .addImm(FPOffset).addReg(PPC::R1);
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ .addReg(TempReg, getKillRegState(i == e-1));
- if (HasBP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R30)
- .addImm(FPOffset).addReg(PPC::R1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
- }
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -880,27 +827,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
- unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg).addImm(CallerAllocatedAmt);
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
- .addReg(StackReg)
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
.addReg(FPReg)
- .addReg(TmpReg);
+ .addReg(ScratchReg);
}
} else if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 475bde1..6ba6af6 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -876,8 +876,10 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// target-specific node if it hasn't already been changed.
SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 664dd12..8da5f05 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -149,28 +149,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -560,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
@@ -579,24 +578,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -1386,6 +1408,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
@@ -1814,6 +1840,12 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
@@ -2276,6 +2308,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -3448,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
@@ -3865,6 +3906,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3956,7 +4006,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -3979,7 +4029,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4287,7 +4337,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4752,7 +4802,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -6676,51 +6726,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -7061,8 +7066,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode() != 0) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
@@ -7158,7 +7183,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
- DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
LD->getAlignment() < ABIAlignment) {
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
@@ -7302,6 +7328,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7645,7 +7673,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
- PPC::sub_32, &PPC::GPRCRegClass),
+ PPC::sub_32, &PPC::G8RCRegClass),
&PPC::G8RCRegClass);
}
@@ -7896,7 +7924,7 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index aa5e821..df3af35 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -627,6 +627,8 @@ namespace llvm {
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
namespace PPC {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index f78bb38..46db4fe 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -270,6 +270,7 @@ def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -506,6 +507,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsb $rA, $rS", IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsh $rA, $rS", IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
"extsw", "$rA, $rS", IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
@@ -520,16 +529,16 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
"cntlzd", "$rA, $rS", IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
-defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd", "$rA, $rS", IntGeneral,
- [(set i64:$rA, (ctpop i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
-defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw", "$rA, $rS", IntGeneral,
- [(set i32:$rA, (ctpop i32:$rS))]>;
+def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divd", "$rT, $rA, $rB", IntDivD,
@@ -569,6 +578,14 @@ defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
@@ -620,6 +637,15 @@ def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
+// For fast-isel:
+let isCodeGenOnly = 1, mayLoad = 1 in {
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
@@ -942,6 +968,9 @@ let PPC970_Unit = 3, neverHasSideEffects = 1,
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctid", "$frD, $frB", FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index fdea51d..a55abe3 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -229,35 +229,45 @@ let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStLoad /*FIXME*/, []>;
+ "dss $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStLoad /*FIXME*/, []>;
+ "dssall", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 42adc02..29233d4 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -398,6 +398,13 @@ class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let RST = 0;
}
+class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -438,6 +445,17 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<1> L;
+
+ let Inst{6-10} = RS;
+ let Inst{15} = L;
+ let Inst{21-30} = xo;
+}
+
class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -534,6 +552,21 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = RC;
}
+class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 375daee..315ad04 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRMAP_INFO
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
using namespace llvm;
@@ -45,6 +45,9 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
+// Pin the vtable to this file.
+void PPCInstrInfo::anchor() {}
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
TM(tm), RI(*TM.getSubtargetImpl()) {}
@@ -985,6 +988,10 @@ bool PPCInstrInfo::SubsumesPredicate(
if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
return false;
+ // P1 can only subsume P2 if they test the same condition register.
+ if (Pred1[1].getReg() != Pred2[1].getReg())
+ return false;
+
PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index bd72a4d..f140c41 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -78,6 +78,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
+ virtual void anchor();
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 398a11b..2bd3aad 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -785,6 +785,20 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
+multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -1678,6 +1692,9 @@ let isCompare = 1, neverHasSideEffects = 1 in {
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
@@ -1686,23 +1703,13 @@ let Uses = [RM] in {
"frsp", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
- // The frin -> nearbyint mapping is valid only in fast-math mode.
let Interpretation64Bit = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ [(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f32:$frD, (fnearbyint f32:$frB))]>;
- }
-
- // These pseudos expand to rint but also set FE_INEXACT when the result does
- // not equal the argument.
- let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
- def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
- "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
- def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
- "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ [(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
@@ -1772,6 +1779,14 @@ defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
"fneg", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+let Interpretation64Bit = 1 in
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", FPGeneral,
@@ -1855,7 +1870,7 @@ def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
"mtspr $SPR, $RT", SprMTSPR>;
def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
- "mftb $RT, $SPR", SprMFTB>;
+ "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
@@ -1927,6 +1942,7 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
@@ -2280,6 +2296,12 @@ def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
(FNMSUBS $A, $C, $B)>;
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign f64:$frB, f32:$frA),
+ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
+def : Pat<(fcopysign f32:$frB, f64:$frA),
+ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
@@ -2300,6 +2322,35 @@ def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
"wait $L", LdStLoad, []>;
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsr $RS, $L", SprMTMSR>;
+
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
+ "mfmsr $RT", SprMFMSR, []>;
+
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsrd $RS, $L", SprMTMSRD>;
+
+def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
+ "slbie $RB", SprSLBIE, []>;
+
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
+ "slbmte $RS, $RB", SprSLBMTE, []>;
+
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
+ "slbmfee $RT, $RB", SprSLBMFEE, []>;
+
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
+
+def TLBSYNC : XForm_0<31, 566, (outs), (ins),
+ "tlbsync", SprTLBSYNC, []>;
+
+def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
+ "tlbiel $RB", SprTLBIEL, []>;
+
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
+ "tlbie $RB,$RS", SprTLBIE, []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -2368,6 +2419,46 @@ def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
+def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+
+def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
+def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+
+def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
+def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+
+def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
+def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
+def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
+def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
+
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+
def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
(ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index d69aa4a..f61c8bf 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -69,7 +69,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -95,7 +95,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (StubSym.getPointer() == 0) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym = MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index adba613..19ccbfc 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -69,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
// 64-bit
ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
@@ -532,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) {
default:
return false;
case PPC::LWA:
+ case PPC::LWA_32:
case PPC::LD:
case PPC::STD:
return true;
@@ -689,14 +691,6 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
-unsigned PPCRegisterInfo::getEHExceptionRegister() const {
- return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
-}
-
-unsigned PPCRegisterInfo::getEHHandlerRegister() const {
- return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
-}
-
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
if (!hasBasePointer(MF))
return getFrameRegister(MF);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index d02af9e..dd3bb40 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -97,10 +97,6 @@ public:
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 660c0c3..92ba69c 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -108,6 +108,14 @@ def VecPerm : InstrItinClass;
def VecFPRound : InstrItinClass;
def VecVSL : InstrItinClass;
def VecVSR : InstrItinClass;
+def SprMTMSRD : InstrItinClass;
+def SprSLIE : InstrItinClass;
+def SprSLBIE : InstrItinClass;
+def SprSLBMTE : InstrItinClass;
+def SprSLBMFEE : InstrItinClass;
+def SprSLBIA : InstrItinClass;
+def SprTLBIEL : InstrItinClass;
+def SprTLBIE : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index 8d5838e..1612cd2 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -14,39 +14,8 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC A2 chip sets
//
-def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1
-def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2
-def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3
-def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4
-def IU4_0 : FuncUnit; // Instruction buffer slot 1
-def IU4_1 : FuncUnit; // Instruction buffer slot 2
-def IU4_2 : FuncUnit; // Instruction buffer slot 3
-def IU4_3 : FuncUnit; // Instruction buffer slot 4
-def IU4_4 : FuncUnit; // Instruction buffer slot 5
-def IU4_5 : FuncUnit; // Instruction buffer slot 6
-def IU4_6 : FuncUnit; // Instruction buffer slot 7
-def IU4_7 : FuncUnit; // Instruction buffer slot 8
-def IU5 : FuncUnit; // Dependency resolution
-def IU6 : FuncUnit; // Instruction issue
-def RF0 : FuncUnit;
-def XRF1 : FuncUnit;
-def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline
-def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline
-def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline
-def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline
-def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline
-def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline
-def FRF1 : FuncUnit;
-def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline
-def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline
-def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline
-def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline
-def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline
-def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline
-
-def CR_Bypass : Bypass; // The bypass for condition regs.
-//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
-//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+def XU : FuncUnit; // XU pipeline
+def FU : FuncUnit; // FI pipeline
//
// This file defines the itinerary class data for the PPC A2 processor.
@@ -55,699 +24,119 @@ def CR_Bypass : Bypass; // The bypass for condition regs.
def PPCA2Itineraries : ProcessorItineraries<
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
- IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
- IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
- FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
- [53, 7, 7],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateDI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStore , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStICBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStLFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
- InstrItinData<SprISYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMTMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7],
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMFSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprRFI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprSC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [13, 7, 7],
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
- InstrStage<71, [FEX1], 0>,
- InstrStage<71, [FEX2], 0>,
- InstrStage<71, [FEX3], 0>,
- InstrStage<71, [FEX4], 0>,
- InstrStage<71, [FEX5], 0>,
- InstrStage<71, [FEX6]>],
- [86, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
- InstrStage<58, [FEX1], 0>,
- InstrStage<58, [FEX2], 0>,
- InstrStage<58, [FEX3], 0>,
- InstrStage<58, [FEX4], 0>,
- InstrStage<58, [FEX5], 0>,
- InstrStage<58, [FEX6]>],
- [73, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPSqrt , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
- InstrStage<68, [FEX1], 0>,
- InstrStage<68, [FEX2], 0>,
- InstrStage<68, [FEX3], 0>,
- InstrStage<68, [FEX4], 0>,
- InstrStage<68, [FEX5], 0>,
- InstrStage<68, [FEX6]>],
- [86, 7], // FIXME: should be [86, 7] for double
- // and [82, 7] for single. Likewise,
- // the FEX? cycle count should be 68
- // for double and 64 for single.
- [NoBypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7],
- [FPR_Bypass, FPR_Bypass]>
+ [XU, FU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntCompare , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntDivW , [InstrStage<1, [XU]>],
+ [39, 1, 1]>,
+ InstrItinData<IntDivD , [InstrStage<1, [XU]>],
+ [71, 1, 1]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<IntRotate , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntShift , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<BrB , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<BrCR , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<BrMCR , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStStore , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStICBI, [InstrStage<1, [XU]>],
+ [16, 1, 1]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [XU]>],
+ [7, 1, 1]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [XU]>],
+ [7, 9, 1, 1]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSync , [InstrStage<1, [XU]>],
+ [6]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [XU]>],
+ [16, 1]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprRFI , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprSC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FU]>],
+ [5, 1, 1]>,
+ InstrItinData<FPDivD , [InstrStage<1, [FU]>],
+ [72, 1, 1]>,
+ InstrItinData<FPDivS , [InstrStage<1, [FU]>],
+ [59, 1, 1]>,
+ InstrItinData<FPSqrt , [InstrStage<1, [FU]>],
+ [69, 1, 1]>,
+ InstrItinData<FPFused , [InstrStage<1, [FU]>],
+ [6, 1, 1, 1]>,
+ InstrItinData<FPRes , [InstrStage<1, [FU]>],
+ [6, 1]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/lib/Target/PowerPC/PPCScheduleE500mc.td b/lib/Target/PowerPC/PPCScheduleE500mc.td
index 9bb779a..c189b9e 100644
--- a/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -36,6 +36,8 @@ def CFX_0 : FuncUnit; // CFX pipeline
def LSU_0 : FuncUnit; // LSU pipeline
def FPU_0 : FuncUnit; // FPU pipeline
+def CR_Bypass : Bypass;
+
def PPCE500mcItineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
diff --git a/lib/Target/PowerPC/PPCScheduleE5500.td b/lib/Target/PowerPC/PPCScheduleE5500.td
index d7e11ac..7a24d20 100644
--- a/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -39,6 +39,7 @@ def CFX_1 : FuncUnit; // CFX pipeline stage 1
// def LSU_0 : FuncUnit; // LSU pipeline
// def FPU_0 : FuncUnit; // FPU pipeline
+// def CR_Bypass : Bypass;
def PPCE5500Itineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 12d0326..7231ab1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -15,6 +15,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Function.h"
@@ -74,6 +75,7 @@ void PPCSubtarget::initializeEnvironment() {
Use64BitRegs = false;
HasAltivec = false;
HasQPX = false;
+ HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
HasFRES = false;
@@ -88,6 +90,8 @@ void PPCSubtarget::initializeEnvironment() {
HasPOPCNTD = false;
HasLDBRX = false;
IsBookE = false;
+ DeprecatedMFTB = false;
+ DeprecatedDST = false;
HasLazyResolverStubs = false;
IsJITCodeModel = false;
}
@@ -163,14 +167,7 @@ bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
- // but we can't because we can't reassign the cr registers. There is a
- // dependence between the cr register and the RLWINM instruction used
- // to extract its value which the anti-dependency breaker can't currently
- // see. Maybe we should make a late-expanded pseudo to encode this dependency.
- // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
-
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ Mode = TargetSubtargetInfo::ANTIDEP_ALL;
CriticalPathRCs.clear();
@@ -179,9 +176,44 @@ bool PPCSubtarget::enablePostRAScheduler(
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
- CriticalPathRCs.push_back(&PPC::F8RCRegClass);
- CriticalPathRCs.push_back(&PPC::VRRCRegClass);
-
return OptLevel >= CodeGenOpt::Default;
}
+// Embedded cores need aggressive scheduling.
+static bool needsAggressiveScheduling(unsigned Directive) {
+ switch (Directive) {
+ default: return false;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return true;
+ }
+}
+
+bool PPCSubtarget::enableMachineScheduler() const {
+ // Enable MI scheduling for the embedded cores.
+ // FIXME: Enable this for all cores (some additional modeling
+ // may be necessary).
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (needsAggressiveScheduling(DarwinDirective)) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+
+ // Spilling is generally expensive on all PPC cores, so always enable
+ // register-pressure tracking.
+ Policy.ShouldTrackPressure = true;
+}
+
+bool PPCSubtarget::useAA() const {
+ // Use AA during code generation for the embedded cores.
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 3f3fc0e..c863a6e 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -76,6 +76,8 @@ protected:
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
+ bool HasVSX;
+ bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
bool HasRecipPrec;
@@ -87,6 +89,8 @@ protected:
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
+ bool DeprecatedMFTB;
+ bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
bool IsLittleEndian;
@@ -171,6 +175,7 @@ public:
bool isLittleEndian() const { return IsLittleEndian; }
// Specific obvious features.
+ bool hasFCPSGN() const { return HasFCPSGN; }
bool hasFSQRT() const { return HasFSQRT; }
bool hasFRE() const { return HasFRE; }
bool hasFRES() const { return HasFRES; }
@@ -188,6 +193,8 @@ public:
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
+ bool isDeprecatedDST() const { return DeprecatedDST; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -205,6 +212,14 @@ public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
+
+ // Scheduling customization.
+ bool enableMachineScheduler() const;
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const;
+ bool useAA() const;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
new file mode 100644
index 0000000..e876be1
--- /dev/null
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -0,0 +1,23 @@
+//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETSTREAMER_H
+#define PPCTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class PPCTargetStreamer : public MCTargetStreamer {
+public:
+ virtual ~PPCTargetStreamer();
+ virtual void emitTCEntry(const MCSymbol &S) = 0;
+};
+}
+
+#endif
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2504ba7..8879630 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -77,6 +77,7 @@ public:
/// \name Scalar TTI Implementations
/// @{
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+ virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
/// @}
@@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software;
}
+void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 is in-order with a deep pipeline, and concatenation unrolling
+ // helps expose latency-hiding opportunities to the instruction scheduler.
+ UP.Partial = UP.Runtime = true;
+ }
+}
+
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 6b374cb..025b28e 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -29,11 +29,13 @@ FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
FunctionPass *createR600TextureIntrinsicsReplacer();
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
FunctionPass *createR600Packetizer(TargetMachine &tm);
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
// SI Passes
+FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
@@ -43,7 +45,6 @@ FunctionPass *createSIInsertWaits(TargetMachine &tm);
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 0048e25..182235b 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -21,8 +21,18 @@ def FeatureDumpCode : SubtargetFeature <"DumpCode",
"true",
"Dump MachineInstrs in the CodeEmitter">;
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+ "EnableIRStructurizer",
+ "false",
+ "Disable IR Structurizer">;
+
// Target features
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+ "EnableIfCvt",
+ "false",
+ "Disable the if conversion pass">;
+
def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
@@ -82,6 +92,8 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64]>;
+def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+ [Feature64BitPtr, FeatureFP64]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index e039b77..67bdba2 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -45,32 +45,60 @@ extern "C" void LLVMInitializeR600AsmPrinter() {
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
}
+AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer)
+{
+ DisasmEnabled = TM.getSubtarget<AMDGPUSubtarget>().dumpCode() &&
+ ! Streamer.hasRawTextSupport();
+}
+
/// We need to override this function so we can avoid
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
- if (STM.dumpCode()) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- MF.dump();
-#endif
- }
SetupMachineFunction(MF);
if (OutStreamer.hasRawTextSupport()) {
OutStreamer.EmitRawText("@" + MF.getName() + ":");
}
- const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
- .getELFSection(".AMDGPU.config",
+ MCContext &Context = getObjFileLowering().getContext();
+ const MCSectionELF *ConfigSection = Context.getELFSection(".AMDGPU.config",
ELF::SHT_PROGBITS, 0,
SectionKind::getReadOnly());
OutStreamer.SwitchSection(ConfigSection);
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
EmitProgramInfoSI(MF);
} else {
EmitProgramInfoR600(MF);
}
+
+ DisasmLines.clear();
+ HexLines.clear();
+ DisasmLineMaxLen = 0;
+
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
+
+ if (STM.dumpCode()) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ MF.dump();
+#endif
+
+ if (DisasmEnabled) {
+ OutStreamer.SwitchSection(Context.getELFSection(".AMDGPU.disasm",
+ ELF::SHT_NOTE, 0,
+ SectionKind::getReadOnly()));
+
+ for (size_t i = 0; i < DisasmLines.size(); ++i) {
+ std::string Comment(DisasmLineMaxLen - DisasmLines[i].size(), ' ');
+ Comment += " ; " + HexLines[i] + "\n";
+
+ OutStreamer.EmitBytes(StringRef(DisasmLines[i]));
+ OutStreamer.EmitBytes(StringRef(Comment));
+ }
+ }
+ }
+
return false;
}
@@ -139,6 +167,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
@@ -154,7 +183,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
- MachineOperand & MO = MI.getOperand(op_idx);
+ MachineOperand &MO = MI.getOperand(op_idx);
unsigned maxUsed;
unsigned width = 0;
bool isSGPR = false;
@@ -168,8 +197,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
VCCUsed = true;
continue;
}
+
switch (reg) {
default: break;
+ case AMDGPU::SCC:
case AMDGPU::EXEC:
case AMDGPU::M0:
continue;
@@ -202,6 +233,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
} else if (AMDGPU::VReg_256RegClass.contains(reg)) {
isSGPR = false;
width = 8;
+ } else if (AMDGPU::SReg_512RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(reg)) {
isSGPR = false;
width = 16;
@@ -234,13 +268,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
OutStreamer.EmitIntValue(RsrcReg, 4);
OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+ unsigned LDSAlignShift;
+ if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ // LDS is allocated in 64 dword blocks
+ LDSAlignShift = 8;
+ } else {
+ // LDS is allocated in 128 dword blocks
+ LDSAlignShift = 9;
+ }
+ unsigned LDSBlocks =
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+
if (MFI->ShaderType == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
}
if (MFI->ShaderType == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(RoundUpToAlignment(MFI->LDSSize, 256) >> 8), 4);
+ OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index f425ef4..05dc9bb 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,14 +16,15 @@
#define AMDGPU_ASMPRINTER_H
#include "llvm/CodeGen/AsmPrinter.h"
+#include <string>
+#include <vector>
namespace llvm {
class AMDGPUAsmPrinter : public AsmPrinter {
public:
- explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) { }
+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -38,6 +39,11 @@ public:
/// Implemented in AMDGPUMCInstLower.cpp
virtual void EmitInstruction(const MachineInstr *MI);
+
+protected:
+ bool DisasmEnabled;
+ std::vector<std::string> DisasmLines, HexLines;
+ size_t DisasmLineMaxLen;
};
} // End anonymous llvm
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index fc95d58..65cdb24 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -19,12 +19,13 @@ def CC_SI : CallingConv<[
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
- SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16
]>>>,
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
[ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
>>>,
CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
@@ -32,21 +33,33 @@ def CC_SI : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
- ]>>>
+ ]>>>,
+
+ CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ >>>
]>;
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+ CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+ T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+ T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+ T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+ T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+ T30_XYZW, T31_XYZW, T32_XYZW
+ ]>>>
+]>;
+
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
- CCIfType<[v4i32, v4f32], CCAssignToStack <16, 16>>,
- CCIfType<[i64, f64, v2f32, v2i32], CCAssignToStack < 8, 8>>,
- CCIfType<[i32, f32], CCAssignToStack < 4, 4>>,
- CCIfType<[i16], CCAssignToStack < 2, 4>>,
- CCIfType<[i8], CCAssignToStack < 1, 4>>
+ CCCustom<"allocateStack">
]>;
def CC_AMDGPU : CallingConv<[
- CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() == "
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && "
"State.getMachineFunction().getInfo<SIMachineFunctionInfo>()->"#
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
@@ -55,5 +68,7 @@ def CC_AMDGPU : CallingConv<[
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
- ".getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
+ ".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
+ CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
+ ".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
]>;
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index f222901..a989135 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -77,6 +77,7 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
bool SelectGlobalValueVariableOffset(SDValue Addr,
SDValue &BaseReg, SDValue& Offset);
@@ -102,6 +103,37 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
+/// \brief Determine the register class for \p OpNo
+/// \returns The register class of the virtual register that will be used for
+/// the given operand number \OpNo or NULL if the register class cannot be
+/// determined.
+const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
+ unsigned OpNo) const {
+ if (!N->isMachineOpcode()) {
+ return NULL;
+ }
+ switch (N->getMachineOpcode()) {
+ default: {
+ const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
+ unsigned OpIdx = Desc.getNumDefs() + OpNo;
+ if (OpIdx >= Desc.getNumOperands())
+ return NULL;
+ int RegClass = Desc.OpInfo[OpIdx].RegClass;
+ if (RegClass == -1) {
+ return NULL;
+ }
+ return TM.getRegisterInfo()->getRegClass(RegClass);
+ }
+ case AMDGPU::REG_SEQUENCE: {
+ const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(
+ cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+ unsigned SubRegIdx =
+ dyn_cast<ConstantSDNode>(N->getOperand(OpNo + 1))->getZExtValue();
+ return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
+ }
+ }
+}
+
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
@@ -161,130 +193,94 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
}
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
}
switch (Opc) {
default: break;
- case AMDGPUISD::CONST_ADDRESS: {
- for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
- I != SDNode::use_end(); I = Next) {
- Next = llvm::next(I);
- if (!I->isMachineOpcode()) {
- continue;
- }
- unsigned Opcode = I->getMachineOpcode();
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
- int SrcIdx = I.getOperandNo();
- int SelIdx;
- // Unlike MachineInstrs, SDNodes do not have results in their operand
- // list, so we need to increment the SrcIdx, since
- // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
- if (HasDst) {
- SrcIdx++;
- }
-
- SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
- if (SelIdx < 0) {
- continue;
- }
-
- SDValue CstOffset;
- if (N->getValueType(0).isVector() ||
- !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
- continue;
-
- // Gather constants values
- int SrcIndices[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- std::vector<unsigned> Consts;
- for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
- int OtherSrcIdx = SrcIndices[i];
- int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
- if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
+ case ISD::BUILD_VECTOR: {
+ unsigned RegClassID;
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ const AMDGPURegisterInfo *TRI =
+ static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ bool UseVReg = true;
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ U != E; ++U) {
+ if (!U->isMachineOpcode()) {
continue;
}
- if (HasDst) {
- OtherSrcIdx--;
- OtherSelIdx--;
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+ if (!RC) {
+ continue;
}
- if (RegisterSDNode *Reg =
- dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
- if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
- Consts.push_back(Cst->getZExtValue());
- }
+ if (SIRI->isSGPRClass(RC)) {
+ UseVReg = false;
}
}
-
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
- Consts.push_back(Cst->getZExtValue());
- if (!TII->fitsConstReadLimitations(Consts))
- continue;
-
- // Convert back to SDNode indices
- if (HasDst) {
- SrcIdx--;
- SelIdx--;
+ switch(NumVectorElts) {
+ case 1: RegClassID = UseVReg ? AMDGPU::VReg_32RegClassID :
+ AMDGPU::SReg_32RegClassID;
+ break;
+ case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
+ AMDGPU::SReg_64RegClassID;
+ break;
+ case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
+ AMDGPU::SReg_128RegClassID;
+ break;
+ case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
+ AMDGPU::SReg_256RegClassID;
+ break;
+ case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
+ AMDGPU::SReg_512RegClassID;
+ break;
+ default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
- std::vector<SDValue> Ops;
- for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (i == SrcIdx) {
- Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
- } else if (i == SelIdx) {
- Ops.push_back(CstOffset);
- } else {
- Ops.push_back(I->getOperand(i));
- }
+ } else {
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch(NumVectorElts) {
+ case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+ case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+ default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
- CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
- }
- break;
- }
- case ISD::BUILD_VECTOR: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
- break;
}
- unsigned RegClassID;
- switch(N->getValueType(0).getVectorNumElements()) {
- case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
+
+ if (NumVectorElts == 1) {
+ return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
+ VT.getVectorElementType(),
+ N->getOperand(0), RegClass);
}
- // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- SDValue RegSeqArgs[9] = {
- CurDAG->getTargetConstant(RegClassID, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32),
- SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32)
- };
+
+ assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
+ "supported yet");
+ // 16 = Max Num Vector Elements
+ // 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
+ // 1 = Vector Register Class
+ SDValue RegSeqArgs[16 * 2 + 1];
+
+ RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
bool IsRegSeq = true;
for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ // XXX: Why is this here?
if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
break;
}
- RegSeqArgs[2 * i + 1] = N->getOperand(i);
+ RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
}
if (!IsRegSeq)
break;
@@ -313,285 +309,44 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
-
- case ISD::ConstantFP:
- case ISD::Constant: {
+ case AMDGPUISD::REGISTER_LOAD: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- // XXX: Custom immediate lowering not implemented yet. Instead we use
- // pseudo instructions defined in SIInstructions.td
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
- }
-
- uint64_t ImmValue = 0;
- unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
-
- if (N->getOpcode() == ISD::ConstantFP) {
- // XXX: 64-bit Immediates not supported yet
- assert(N->getValueType(0) != MVT::f64);
-
- ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
- APFloat Value = C->getValueAPF();
- float FloatValue = Value.convertToFloat();
- if (FloatValue == 0.0) {
- ImmReg = AMDGPU::ZERO;
- } else if (FloatValue == 0.5) {
- ImmReg = AMDGPU::HALF;
- } else if (FloatValue == 1.0) {
- ImmReg = AMDGPU::ONE;
- } else {
- ImmValue = Value.bitcastToAPInt().getZExtValue();
- }
- } else {
- // XXX: 64-bit Immediates not supported yet
- assert(N->getValueType(0) != MVT::i64);
-
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (C->getZExtValue() == 0) {
- ImmReg = AMDGPU::ZERO;
- } else if (C->getZExtValue() == 1) {
- ImmReg = AMDGPU::ONE_INT;
- } else {
- ImmValue = C->getZExtValue();
- }
- }
-
- for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
- Use != SDNode::use_end(); Use = Next) {
- Next = llvm::next(Use);
- std::vector<SDValue> Ops;
- for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
- Ops.push_back(Use->getOperand(i));
- }
-
- if (!Use->isMachineOpcode()) {
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
- // We can only use literal constants (e.g. AMDGPU::ZERO,
- // AMDGPU::ONE, etc) in machine opcodes.
- continue;
- }
- } else {
- if (!TII->isALUInstr(Use->getMachineOpcode()) ||
- (TII->get(Use->getMachineOpcode()).TSFlags &
- R600_InstFlag::VECTOR)) {
- continue;
- }
-
- int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::literal);
- if (ImmIdx == -1) {
- continue;
- }
-
- if (TII->getOperandIdx(Use->getMachineOpcode(),
- AMDGPU::OpName::dst) != -1) {
- // subtract one from ImmIdx, because the DST operand is usually index
- // 0 for MachineInstrs, but we have no DST in the Ops vector.
- ImmIdx--;
- }
-
- // Check that we aren't already using an immediate.
- // XXX: It's possible for an instruction to have more than one
- // immediate operand, but this is not supported yet.
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
- assert(C);
-
- if (C->getZExtValue() != 0) {
- // This instruction is already using an immediate.
- continue;
- }
-
- // Set the immediate value
- Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
- }
- }
- // Set the immediate register
- Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
-
- CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
- }
- break;
- }
- }
- SDNode *Result = SelectCode(N);
-
- // Fold operands of selected node
-
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- const R600InstrInfo *TII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
- if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
- bool IsModified = false;
- do {
- std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
- I != E; ++I)
- Ops.push_back(*I);
- IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
- if (IsModified) {
- Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
- }
- } while (IsModified);
-
- }
- if (Result && Result->isMachineOpcode() &&
- !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
- && TII->hasInstrModifiers(Result->getMachineOpcode())) {
- // Fold FNEG/FABS
- // TODO: Isel can generate multiple MachineInst, we need to recursively
- // parse Result
- bool IsModified = false;
- do {
- std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
- I != E; ++I)
- Ops.push_back(*I);
- IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
- if (IsModified) {
- Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
- }
- } while (IsModified);
-
- // If node has a single use which is CLAMP_R600, folds it
- if (Result->hasOneUse() && Result->isMachineOpcode()) {
- SDNode *PotentialClamp = *Result->use_begin();
- if (PotentialClamp->isMachineOpcode() &&
- PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
- unsigned ClampIdx =
- TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
- std::vector<SDValue> Ops;
- unsigned NumOp = Result->getNumOperands();
- for (unsigned i = 0; i < NumOp; ++i) {
- Ops.push_back(Result->getOperand(i));
- }
- Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
- Result = CurDAG->SelectNodeTo(PotentialClamp,
- Result->getMachineOpcode(), PotentialClamp->getVTList(),
- Ops.data(), NumOp);
- }
- }
- }
+ SDValue Addr, Offset;
+
+ SelectADDRIndirect(N->getOperand(1), Addr, Offset);
+ const SDValue Ops[] = {
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, SDLoc(N),
+ CurDAG->getVTList(MVT::i32, MVT::i64, MVT::Other),
+ Ops);
}
-
- return Result;
-}
-
-bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
- SDValue &Abs, const R600InstrInfo *TII) {
- switch (Src.getOpcode()) {
- case ISD::FNEG:
- Src = Src.getOperand(0);
- Neg = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::FABS:
- if (!Abs.getNode())
- return false;
- Src = Src.getOperand(0);
- Abs = CurDAG->getTargetConstant(1, MVT::i32);
- return true;
- case ISD::BITCAST:
- Src = Src.getOperand(0);
- return true;
- default:
- return false;
+ case AMDGPUISD::REGISTER_STORE: {
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ break;
+ SDValue Addr, Offset;
+ SelectADDRIndirect(N->getOperand(2), Addr, Offset);
+ const SDValue Ops[] = {
+ N->getOperand(1),
+ Addr,
+ Offset,
+ CurDAG->getTargetConstant(0, MVT::i32),
+ N->getOperand(0),
+ };
+ return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, SDLoc(N),
+ CurDAG->getVTList(MVT::Other),
+ Ops);
}
-}
-
-bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
- const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
- };
- int SelIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
- -1
- };
-
-
- for (unsigned i = 0; i < 3; i++) {
- if (OperandIdx[i] < 0)
- return false;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Sel = Ops[SelIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue FakeAbs;
- SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
- if (FoldOperand(Src, Sel, Neg, Abs, TII))
- return true;
}
- return false;
+ return SelectCode(N);
}
-bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
- const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
- int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
- };
- int SelIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
- };
- int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
- };
- int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
- };
-
- for (unsigned i = 0; i < 8; i++) {
- if (OperandIdx[i] < 0)
- return false;
- SDValue &Src = Ops[OperandIdx[i] - 1];
- SDValue &Sel = Ops[SelIdx[i] - 1];
- SDValue &Neg = Ops[NegIdx[i] - 1];
- SDValue &Abs = Ops[AbsIdx[i] - 1];
- if (FoldOperand(Src, Sel, Neg, Abs, TII))
- return true;
- }
- return false;
-}
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
if (!ptr) {
@@ -804,26 +559,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
}
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
-
- if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return;
- }
-
- // Go over all selected nodes and try to fold them a bit more
const AMDGPUTargetLowering& Lowering =
(*(const AMDGPUTargetLowering*)getTargetLowering());
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
+ bool IsModified = false;
+ do {
+ IsModified = false;
+ // Go over all selected nodes and try to fold them a bit more
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
- SDNode *Node = I;
+ SDNode *Node = I;
- MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
- if (!MachineNode)
- continue;
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ if (!MachineNode)
+ continue;
- SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
- if (ResNode != Node) {
- ReplaceUses(Node, ResNode);
+ SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
+ if (ResNode != Node) {
+ ReplaceUses(Node, ResNode);
+ IsModified = true;
+ }
}
- }
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index efd2756..c4d75ff 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUFrameLowering.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
@@ -28,6 +29,14 @@
#include "llvm/IR/DataLayout.h"
using namespace llvm;
+static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+ return true;
+}
#include "AMDGPUGenCallingConv.inc"
@@ -49,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FABS, MVT::f32, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
// The hardware supports ROTR, but not ROTL
setOperationAction(ISD::ROTL, MVT::i32, Expand);
@@ -64,9 +74,29 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::STORE, MVT::v8f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
+
+ setOperationAction(ISD::STORE, MVT::v16f32, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
+
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ // Custom lowering of vector stores is required for local address space
+ // stores.
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ // XXX: Native v2i32 local address space stores are possible, but not
+ // currently implemented.
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
+ // XXX: This can be change to Custom, once ExpandVectorStores can
+ // handle 64-bit stores.
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+
setOperationAction(ISD::LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
@@ -76,15 +106,38 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
+ setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
+
+ setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
+
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
@@ -93,14 +146,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
- static const int types[] = {
- (int)MVT::v2i32,
- (int)MVT::v4i32
+ static const MVT::SimpleValueType IntTypes[] = {
+ MVT::v2i32, MVT::v4i32
};
- const size_t NumTypes = array_lengthof(types);
+ const size_t NumIntTypes = array_lengthof(IntTypes);
- for (unsigned int x = 0; x < NumTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+ for (unsigned int x = 0; x < NumIntTypes; ++x) {
+ MVT::SimpleValueType VT = IntTypes[x];
//Expand the following operations for the current type by default
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
@@ -119,6 +171,23 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
}
+
+ static const MVT::SimpleValueType FloatTypes[] = {
+ MVT::v2f32, MVT::v4f32
+ };
+ const size_t NumFloatTypes = array_lengthof(FloatTypes);
+
+ for (unsigned int x = 0; x < NumFloatTypes; ++x) {
+ MVT::SimpleValueType VT = FloatTypes[x];
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FSUB, VT, Expand);
+ }
}
//===----------------------------------------------------------------------===//
@@ -129,6 +198,18 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const {
return MVT::i32;
}
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
+ EVT CastTy) const {
+ if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
+ return true;
+
+ unsigned LScalarSize = LoadTy.getScalarType().getSizeInBits();
+ unsigned CastScalarSize = CastTy.getScalarType().getSizeInBits();
+
+ return ((LScalarSize <= CastScalarSize) ||
+ (CastScalarSize >= 32) ||
+ (LScalarSize < 32));
+}
//===---------------------------------------------------------------------===//
// Target Properties
@@ -182,8 +263,12 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
}
@@ -194,18 +279,82 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
const DataLayout *TD = getTargetMachine().getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+
+ assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
- unsigned Offset = MFI->LDSSize;
const GlobalValue *GV = G->getGlobal();
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
- // XXX: Account for alignment?
- MFI->LDSSize += Size;
+ unsigned Offset;
+ if (MFI->LocalMemoryObjects.count(GV) == 0) {
+ uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ Offset = MFI->LDSSize;
+ MFI->LocalMemoryObjects[GV] = Offset;
+ // XXX: Account for alignment?
+ MFI->LDSSize += Size;
+ } else {
+ Offset = MFI->LocalMemoryObjects[GV];
+ }
+
+ return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
+}
+
+void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start,
+ unsigned Count) const {
+ EVT VT = Op.getValueType();
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
+ VT.getVectorElementType(),
+ Op, DAG.getConstant(i, MVT::i32)));
+ }
+}
+
+SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SmallVector<SDValue, 8> Args;
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ ExtractVectorElements(A, DAG, Args, 0,
+ A.getValueType().getVectorNumElements());
+ ExtractVectorElements(B, DAG, Args, 0,
+ B.getValueType().getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SmallVector<SDValue, 8> Args;
+ EVT VT = Op.getValueType();
+ unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
+ VT.getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
- return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AMDGPUFrameLowering *TFL =
+ static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
+
+ FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
+ assert(FIN);
+
+ unsigned FrameIndex = FIN->getIndex();
+ unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
+ Op.getValueType());
}
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
@@ -335,7 +484,122 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
return Op;
}
+SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT PtrVT = Load->getBasePtr().getValueType();
+ unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
+ SmallVector<SDValue, 8> Loads;
+ SDLoc SL(Op);
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
+ Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+ Load->getChain(), Ptr,
+ MachinePointerInfo(Load->getMemOperand()->getValue()),
+ MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment()));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
+ Loads.size());
+}
+
+SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ EVT MemVT = Store->getMemoryVT();
+ unsigned MemBits = MemVT.getSizeInBits();
+
+ // Byte stores are really expensive, so if possible, try to pack
+ // 32-bit vector truncatating store into an i32 store.
+ // XXX: We could also handle optimize other vector bitwidths
+ if (!MemVT.isVector() || MemBits > 32) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ const SDValue &Value = Store->getValue();
+ EVT VT = Value.getValueType();
+ const SDValue &Ptr = Store->getBasePtr();
+ EVT MemEltVT = MemVT.getVectorElementType();
+ unsigned MemEltBits = MemEltVT.getSizeInBits();
+ unsigned MemNumElements = MemVT.getVectorNumElements();
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ SDValue Mask;
+ switch(MemEltBits) {
+ case 8:
+ Mask = DAG.getConstant(0xFF, PackedVT);
+ break;
+ case 16:
+ Mask = DAG.getConstant(0xFFFF, PackedVT);
+ break;
+ default:
+ llvm_unreachable("Cannot lower this vector store");
+ }
+ SDValue PackedValue;
+ for (unsigned i = 0; i < MemNumElements; ++i) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+ DAG.getConstant(i, MVT::i32));
+ Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
+ Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
+ Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ if (i == 0) {
+ PackedValue = Elt;
+ } else {
+ PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ }
+ }
+ return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment());
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
+ EVT EltVT = Store->getValue().getValueType().getVectorElementType();
+ EVT PtrVT = Store->getBasePtr().getValueType();
+ unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
+ SDLoc SL(Op);
+
+ SmallVector<SDValue, 8> Chains;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Store->getValue(), DAG.getConstant(i, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
+ Store->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
+ PtrVT));
+ Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment()));
+ }
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
+}
+SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
+
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ Store->getValue().getValueType().isVector()) {
+ return SplitVectorStore(Op, DAG);
+ }
+ return SDValue();
+}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
@@ -392,13 +656,13 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
- // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
- DAG.getConstant(0, VT),
+ ISD::SETUGE);
+ // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
+ Num_S_Remainder,
DAG.getConstant(-1, VT),
DAG.getConstant(0, VT),
- ISD::SETGE);
+ ISD::SETUGE);
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
Remainder_GE_Zero);
@@ -442,10 +706,62 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
+SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue S0 = Op.getOperand(0);
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
+ return SDValue();
+
+ // f32 uint_to_fp i64
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(0, MVT::i32));
+ SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
+ DAG.getConstant(1, MVT::i32));
+ SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
+ DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
+ return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+
+}
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
+void AMDGPUTargetLowering::getOriginalFunctionArgs(
+ SelectionDAG &DAG,
+ const Function *F,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<ISD::InputArg> &OrigIns) const {
+
+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
+ if (Ins[i].ArgVT == Ins[i].VT) {
+ OrigIns.push_back(Ins[i]);
+ continue;
+ }
+
+ EVT VT;
+ if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
+ // Vector has been split into scalars.
+ VT = Ins[i].ArgVT.getVectorElementType();
+ } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
+ Ins[i].ArgVT.getVectorElementType() !=
+ Ins[i].VT.getVectorElementType()) {
+ // Vector elements have been promoted
+ VT = Ins[i].ArgVT;
+ } else {
+ // Vector has been spilt into smaller vectors.
+ VT = Ins[i].VT;
+ }
+
+ ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
+ Ins[i].OrigArgIndex, Ins[i].PartOffset);
+ OrigIns.push_back(Arg);
+ }
+}
+
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
@@ -507,5 +823,13 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
+ NODE_NAME_CASE(LOAD_CONSTANT)
+ NODE_NAME_CASE(LOAD_INPUT)
+ NODE_NAME_CASE(SAMPLE)
+ NODE_NAME_CASE(SAMPLEB)
+ NODE_NAME_CASE(SAMPLED)
+ NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(STORE_MSKOR)
+ NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index f614e23..2dfd3cf 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -25,8 +25,20 @@ class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
private:
+ void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) const;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+ /// \brief Split a vector store into multiple scalar stores.
+ /// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
protected:
@@ -39,10 +51,23 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
-
+ /// \brief Split a vector load into multiple scalar loads.
+ SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
+ SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
+ /// The SelectionDAGBuilder will automatically promote function arguments
+ /// with illegal types. However, this does not work for the AMDGPU targets
+ /// since the function arguments are stored in memory as these illegal types.
+ /// In order to handle this properly we need to get the origianl types sizes
+ /// from the LLVM IR Function and fixup the ISD:InputArg values before
+ /// passing them to AnalyzeFormalArguments()
+ void getOriginalFunctionArgs(SelectionDAG &DAG,
+ const Function *F,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<ISD::InputArg> &OrigIns) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
@@ -52,6 +77,7 @@ public:
virtual bool isFAbsFree(EVT VT) const;
virtual bool isFNegFree(EVT VT) const;
virtual MVT getVectorIdxTy() const;
+ virtual bool isLoadBitCastBeneficial(EVT, EVT) const LLVM_OVERRIDE;
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -139,6 +165,15 @@ enum {
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
+ LOAD_INPUT,
+ SAMPLE,
+ SAMPLEB,
+ SAMPLED,
+ SAMPLEL,
+ FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ STORE_MSKOR,
+ LOAD_CONSTANT,
+ TBUFFER_STORE_FORMAT,
LAST_AMDGPU_ISD_NUMBER
};
diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
deleted file mode 100644
index 3ce3ecf..0000000
--- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
+++ /dev/null
@@ -1,345 +0,0 @@
-//===-- AMDGPUIndirectAddressing.cpp - Indirect Adressing Support ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Instructions can use indirect addressing to index the register file as if it
-/// were memory. This pass lowers RegisterLoad and RegisterStore instructions
-/// to either a COPY or a MOV that uses indirect addressing.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUIndirectAddressingPass : public MachineFunctionPass {
-
-private:
- static char ID;
- const AMDGPUInstrInfo *TII;
-
- bool regHasExplicitDef(MachineRegisterInfo &MRI, unsigned Reg) const;
-
-public:
- AMDGPUIndirectAddressingPass(TargetMachine &tm) :
- MachineFunctionPass(ID),
- TII(0)
- { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "R600 Handle indirect addressing"; }
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUIndirectAddressingPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUIndirectAddressingPass(TargetMachine &tm) {
- return new AMDGPUIndirectAddressingPass(tm);
-}
-
-bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
-
- TII = static_cast<const AMDGPUInstrInfo*>(MF.getTarget().getInstrInfo());
-
- int IndirectBegin = TII->getIndirectIndexBegin(MF);
- int IndirectEnd = TII->getIndirectIndexEnd(MF);
-
- if (IndirectBegin == -1) {
- // No indirect addressing, we can skip this pass
- assert(IndirectEnd == -1);
- return false;
- }
-
- // The map keeps track of the indirect address that is represented by
- // each virtual register. The key is the register and the value is the
- // indirect address it uses.
- std::map<unsigned, unsigned> RegisterAddressMap;
-
- // First pass - Lower all of the RegisterStore instructions and track which
- // registers are live.
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- // This map keeps track of the current live indirect registers.
- // The key is the address and the value is the register
- std::map<unsigned, unsigned> LiveAddressRegisterMap;
- MachineBasicBlock &MBB = *BB;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
- Next = llvm::next(I);
- MachineInstr &MI = *I;
-
- if (!TII->isRegisterStore(MI)) {
- continue;
- }
-
- // Lower RegisterStore
-
- unsigned RegIndex = MI.getOperand(2).getImm();
- unsigned Channel = MI.getOperand(3).getImm();
- unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
- const TargetRegisterClass *IndirectStoreRegClass =
- TII->getIndirectAddrStoreRegClass(MI.getOperand(0).getReg());
-
- if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
- // Direct register access.
- unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
-
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY), DstReg)
- .addOperand(MI.getOperand(0));
-
- RegisterAddressMap[DstReg] = Address;
- LiveAddressRegisterMap[Address] = DstReg;
- } else {
- // Indirect register access.
- MachineInstrBuilder MOV = TII->buildIndirectWrite(BB, I,
- MI.getOperand(0).getReg(), // Value
- Address,
- MI.getOperand(1).getReg()); // Offset
- for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
- unsigned Addr = TII->calculateIndirectAddress(i, Channel);
- unsigned DstReg = MRI.createVirtualRegister(IndirectStoreRegClass);
- MOV.addReg(DstReg, RegState::Define | RegState::Implicit);
- RegisterAddressMap[DstReg] = Addr;
- LiveAddressRegisterMap[Addr] = DstReg;
- }
- }
- MI.eraseFromParent();
- }
-
- // Update the live-ins of the succesor blocks
- for (MachineBasicBlock::succ_iterator Succ = MBB.succ_begin(),
- SuccEnd = MBB.succ_end();
- SuccEnd != Succ; ++Succ) {
- std::map<unsigned, unsigned>::const_iterator Key, KeyEnd;
- for (Key = LiveAddressRegisterMap.begin(),
- KeyEnd = LiveAddressRegisterMap.end(); KeyEnd != Key; ++Key) {
- (*Succ)->addLiveIn(Key->second);
- }
- }
- }
-
- // Second pass - Lower the RegisterLoad instructions
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- // Key is the address and the value is the register
- std::map<unsigned, unsigned> LiveAddressRegisterMap;
- MachineBasicBlock &MBB = *BB;
-
- MachineBasicBlock::livein_iterator LI = MBB.livein_begin();
- while (LI != MBB.livein_end()) {
- std::vector<unsigned> PhiRegisters;
-
- // Make sure this live in is used for indirect addressing
- if (RegisterAddressMap.find(*LI) == RegisterAddressMap.end()) {
- ++LI;
- continue;
- }
-
- unsigned Address = RegisterAddressMap[*LI];
- LiveAddressRegisterMap[Address] = *LI;
- PhiRegisters.push_back(*LI);
-
- // Check if there are other live in registers which map to the same
- // indirect address.
- for (MachineBasicBlock::livein_iterator LJ = llvm::next(LI),
- LE = MBB.livein_end();
- LJ != LE; ++LJ) {
- unsigned Reg = *LJ;
- if (RegisterAddressMap.find(Reg) == RegisterAddressMap.end()) {
- continue;
- }
-
- if (RegisterAddressMap[Reg] == Address) {
- PhiRegisters.push_back(Reg);
- }
- }
-
- if (PhiRegisters.size() == 1) {
- // We don't need to insert a Phi instruction, so we can just add the
- // registers to the live list for the block.
- LiveAddressRegisterMap[Address] = *LI;
- MBB.removeLiveIn(*LI);
- } else {
- // We need to insert a PHI, because we have the same address being
- // written in multiple predecessor blocks.
- const TargetRegisterClass *PhiDstClass =
- TII->getIndirectAddrStoreRegClass(*(PhiRegisters.begin()));
- unsigned PhiDstReg = MRI.createVirtualRegister(PhiDstClass);
- MachineInstrBuilder Phi = BuildMI(MBB, MBB.begin(),
- MBB.findDebugLoc(MBB.begin()),
- TII->get(AMDGPU::PHI), PhiDstReg);
-
- for (std::vector<unsigned>::const_iterator RI = PhiRegisters.begin(),
- RE = PhiRegisters.end();
- RI != RE; ++RI) {
- unsigned Reg = *RI;
- MachineInstr *DefInst = MRI.getVRegDef(Reg);
- assert(DefInst);
- MachineBasicBlock *RegBlock = DefInst->getParent();
- Phi.addReg(Reg);
- Phi.addMBB(RegBlock);
- MBB.removeLiveIn(Reg);
- }
- RegisterAddressMap[PhiDstReg] = Address;
- LiveAddressRegisterMap[Address] = PhiDstReg;
- }
- LI = MBB.livein_begin();
- }
-
- for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
- I != MBB.end(); I = Next) {
- Next = llvm::next(I);
- MachineInstr &MI = *I;
-
- if (!TII->isRegisterLoad(MI)) {
- if (MI.getOpcode() == AMDGPU::PHI) {
- continue;
- }
- // Check for indirect register defs
- for (unsigned OpIdx = 0, NumOperands = MI.getNumOperands();
- OpIdx < NumOperands; ++OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- if (MO.isReg() && MO.isDef() &&
- RegisterAddressMap.find(MO.getReg()) != RegisterAddressMap.end()) {
- unsigned Reg = MO.getReg();
- unsigned LiveAddress = RegisterAddressMap[Reg];
- // Chain the live-ins
- if (LiveAddressRegisterMap.find(LiveAddress) !=
- LiveAddressRegisterMap.end()) {
- MI.addOperand(MachineOperand::CreateReg(
- LiveAddressRegisterMap[LiveAddress],
- false, // isDef
- true, // isImp
- true)); // isKill
- }
- LiveAddressRegisterMap[LiveAddress] = Reg;
- }
- }
- continue;
- }
-
- const TargetRegisterClass *SuperIndirectRegClass =
- TII->getSuperIndirectRegClass();
- const TargetRegisterClass *IndirectLoadRegClass =
- TII->getIndirectAddrLoadRegClass();
- unsigned IndirectReg = MRI.createVirtualRegister(SuperIndirectRegClass);
-
- unsigned RegIndex = MI.getOperand(2).getImm();
- unsigned Channel = MI.getOperand(3).getImm();
- unsigned Address = TII->calculateIndirectAddress(RegIndex, Channel);
-
- if (MI.getOperand(1).getReg() == AMDGPU::INDIRECT_BASE_ADDR) {
- // Direct register access
- unsigned Reg = LiveAddressRegisterMap[Address];
- unsigned AddrReg = IndirectLoadRegClass->getRegister(Address);
-
- if (regHasExplicitDef(MRI, Reg)) {
- // If the register we are reading from has an explicit def, then that
- // means it was written via a direct register access (i.e. COPY
- // or other instruction that doesn't use indirect addressing). In
- // this case we know where the value has been stored, so we can just
- // issue a copy.
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
- MI.getOperand(0).getReg())
- .addReg(Reg);
- } else {
- // If the register we are reading has an implicit def, then that
- // means it was written by an indirect register access (i.e. An
- // instruction that uses indirect addressing.
- BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::COPY),
- MI.getOperand(0).getReg())
- .addReg(AddrReg)
- .addReg(Reg, RegState::Implicit);
- }
- } else {
- // Indirect register access
-
- // Note on REQ_SEQUENCE instructons: You can't actually use the register
- // it defines unless you have an instruction that takes the defined
- // register class as an operand.
-
- MachineInstrBuilder Sequence = BuildMI(MBB, I, MBB.findDebugLoc(I),
- TII->get(AMDGPU::REG_SEQUENCE),
- IndirectReg);
- for (int i = IndirectBegin; i <= IndirectEnd; ++i) {
- unsigned Addr = TII->calculateIndirectAddress(i, Channel);
- if (LiveAddressRegisterMap.find(Addr) == LiveAddressRegisterMap.end()) {
- continue;
- }
- unsigned Reg = LiveAddressRegisterMap[Addr];
-
- // We only need to use REG_SEQUENCE for explicit defs, since the
- // register coalescer won't do anything with the implicit defs.
- if (!regHasExplicitDef(MRI, Reg)) {
- continue;
- }
-
- // Insert a REQ_SEQUENCE instruction to force the register allocator
- // to allocate the virtual register to the correct physical register.
- Sequence.addReg(LiveAddressRegisterMap[Addr]);
- Sequence.addImm(TII->getRegisterInfo().getIndirectSubReg(Addr));
- }
- MachineInstrBuilder Mov = TII->buildIndirectRead(BB, I,
- MI.getOperand(0).getReg(), // Value
- Address,
- MI.getOperand(1).getReg()); // Offset
-
-
-
- Mov.addReg(IndirectReg, RegState::Implicit | RegState::Kill);
- Mov.addReg(LiveAddressRegisterMap[Address], RegState::Implicit);
-
- }
- MI.eraseFromParent();
- }
- }
- return false;
-}
-
-bool AMDGPUIndirectAddressingPass::regHasExplicitDef(MachineRegisterInfo &MRI,
- unsigned Reg) const {
- MachineInstr *DefInstr = MRI.getVRegDef(Reg);
-
- if (!DefInstr) {
- return false;
- }
-
- if (DefInstr->getOpcode() == AMDGPU::PHI) {
- bool Explicit = false;
- for (MachineInstr::const_mop_iterator I = DefInstr->operands_begin(),
- E = DefInstr->operands_end();
- I != E; ++I) {
- const MachineOperand &MO = *I;
- if (!MO.isReg() || MO.isDef()) {
- continue;
- }
-
- Explicit = Explicit || regHasExplicitDef(MRI, MO.getReg());
- }
- return Explicit;
- }
-
- return DefInstr->getOperand(0).isReg() &&
- DefInstr->getOperand(0).getReg() == Reg;
-}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index 61437e9..4f7084b 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -20,15 +20,19 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
using namespace llvm;
+
+// Pin the vtable to this file.
+void AMDGPUInstrInfo::anchor() {}
+
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(0,0), RI(tm), TM(tm) { }
+ : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -118,6 +122,55 @@ AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(!"Not Implemented");
}
+bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ int OffsetOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::addr);
+ // addr is a custom operand with multiple MI operands, and only the
+ // first MI operand is given a name.
+ int RegOpIdx = OffsetOpIdx + 1;
+ int ChanOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::chan);
+
+ if (isRegisterLoad(*MI)) {
+ int DstOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+ unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+ if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ buildMovInstr(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+ getIndirectAddrRegClass()->getRegister(Address));
+ } else {
+ buildIndirectRead(MBB, MI, MI->getOperand(DstOpIdx).getReg(),
+ Address, OffsetReg);
+ }
+ } else if (isRegisterStore(*MI)) {
+ int ValOpIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::val);
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ unsigned RegIndex = MI->getOperand(RegOpIdx).getImm();
+ unsigned Channel = MI->getOperand(ChanOpIdx).getImm();
+ unsigned Address = calculateIndirectAddress(RegIndex, Channel);
+ unsigned OffsetReg = MI->getOperand(OffsetOpIdx).getReg();
+ if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
+ MI->getOperand(ValOpIdx).getReg());
+ } else {
+ buildIndirectWrite(MBB, MI, MI->getOperand(ValOpIdx).getReg(),
+ calculateIndirectAddress(RegIndex, Channel),
+ OffsetReg);
+ }
+ } else {
+ return false;
+ }
+
+ MBB->erase(MI);
+ return true;
+}
+
+
MachineInstr *
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
@@ -223,6 +276,57 @@ bool AMDGPUInstrInfo::isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
}
+int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int Offset = -1;
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ if (MRI.livein_empty()) {
+ return 0;
+ }
+
+ const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
+ for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
+ LE = MRI.livein_end();
+ LI != LE; ++LI) {
+ unsigned Reg = LI->first;
+ if (TargetRegisterInfo::isVirtualRegister(Reg) ||
+ !IndirectRC->contains(Reg))
+ continue;
+
+ unsigned RegIndex;
+ unsigned RegEnd;
+ for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
+ ++RegIndex) {
+ if (IndirectRC->getRegister(RegIndex) == Reg)
+ break;
+ }
+ Offset = std::max(Offset, (int)RegIndex);
+ }
+
+ return Offset + 1;
+}
+
+int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
+ int Offset = 0;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Variable sized objects are not supported
+ assert(!MFI->hasVarSizedObjects());
+
+ if (MFI->getNumObjects() == 0) {
+ return -1;
+ }
+
+ Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+
+ return getIndirectIndexBegin(MF) + Offset;
+}
+
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const {
@@ -244,3 +348,12 @@ void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
}
}
}
+
+int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
+ switch (Channels) {
+ default: return Opcode;
+ case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
+ case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
+ case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
+ }
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 306f467..ce5b58c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -43,6 +43,7 @@ private:
const AMDGPURegisterInfo RI;
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const;
+ virtual void anchor();
protected:
TargetMachine &TM;
public:
@@ -87,6 +88,8 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
@@ -97,6 +100,14 @@ protected:
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const;
+ /// \returns the smallest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+
+ /// \returns the largest register index that will be accessed by an indirect
+ /// read or write or -1 if indirect addressing is not used by this program.
+ virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
@@ -139,19 +150,9 @@ public:
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
- virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const = 0;
virtual unsigned getIEQOpcode() const = 0;
virtual bool isMov(unsigned opcode) const = 0;
- /// \returns the smallest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const = 0;
-
- /// \returns the largest register index that will be accessed by an indirect
- /// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const = 0;
-
/// \brief Calculate the "Indirect Address" for the given \p RegIndex and
/// \p Channel
///
@@ -162,14 +163,9 @@ public:
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const = 0;
- /// \returns The register class to be used for storing values to an
- /// "Indirect Address" .
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const = 0;
-
- /// \returns The register class to be used for loading values from
- /// an "Indirect Address" .
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const = 0;
+ /// \returns The register class to be used for loading and storing values
+ /// from an "Indirect Address" .
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const = 0;
/// \brief Build instruction(s) for an indirect register write.
///
@@ -187,16 +183,21 @@ public:
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const = 0;
- /// \returns the register class whose sub registers are the set of all
- /// possible registers that can be used for indirect addressing.
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const = 0;
-
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
DebugLoc DL) const;
+ /// \brief Build a MOV instruction.
+ virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const = 0;
+
+ /// \brief Given a MIMG \p Opcode that writes all 4 channels, return the
+ /// equivalent opcode that writes \p Channels Channels.
+ int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
+
};
namespace AMDGPU {
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 48d89dd..fccede0 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -72,3 +72,17 @@ def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayStore]>;
+
+// MSKOR instructions are atomic memory instructions used mainly for storing
+// 8-bit and 16-bit values. The definition is:
+//
+// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src)
+//
+// src0: vec4(src, 0, 0, mask)
+// src1: dst - rat offset (aka pointer) in dwords
+def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
+ SDTypeProfile<0, 2, []>,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def AMDGPUround : SDNode<"ISD::FROUND",
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index d6a7759..3c5375d 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -35,46 +35,75 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
}
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
+def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
-def COND_EQ : PatLeaf <
+//===----------------------------------------------------------------------===//
+// PatLeafs for floating-point comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_OEQ : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOEQ: case ISD::SETUEQ:
- case ISD::SETEQ: return true;}}}]
+ [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
>;
-def COND_NE : PatLeaf <
+def COND_OGT : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
+>;
+
+def COND_OGE : PatLeaf <
+ (cond),
+ [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
+>;
+
+def COND_OLT : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETONE: case ISD::SETUNE:
- case ISD::SETNE: return true;}}}]
+ [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}]
>;
-def COND_GT : PatLeaf <
+
+def COND_OLE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGT: case ISD::SETUGT:
- case ISD::SETGT: return true;}}}]
+ [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}]
>;
-def COND_GE : PatLeaf <
+def COND_UNE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOGE: case ISD::SETUGE:
- case ISD::SETGE: return true;}}}]
+ [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
>;
-def COND_LT : PatLeaf <
+def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>;
+def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for unsigned comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>;
+def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>;
+def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>;
+def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for signed comparisons
+//===----------------------------------------------------------------------===//
+
+def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>;
+def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>;
+def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>;
+def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>;
+
+//===----------------------------------------------------------------------===//
+// PatLeafs for integer equality
+//===----------------------------------------------------------------------===//
+
+def COND_EQ : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLT: case ISD::SETULT:
- case ISD::SETLT: return true;}}}]
+ [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}]
>;
-def COND_LE : PatLeaf <
+def COND_NE : PatLeaf <
(cond),
- [{switch(N->get()){{default: return false;
- case ISD::SETOLE: case ISD::SETULE:
- case ISD::SETLE: return true;}}}]
+ [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}]
>;
def COND_NULL : PatLeaf <
@@ -96,6 +125,10 @@ def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
+def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def sextloadi8_global : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
}]>;
@@ -108,8 +141,12 @@ def sextloadi8_constant : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
-def az_extloadi8_global : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+def az_extloadi8_local : PatFrag<(ops node:$ptr), (az_extloadi8 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
@@ -132,6 +169,14 @@ def sextloadi16_constant : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
+def az_extloadi16_local : PatFrag<(ops node:$ptr), (az_extloadi16 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
@@ -146,20 +191,55 @@ def az_extloadi32_constant : PatFrag<(ops node:$ptr),
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
}]>;
-def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isLocalLoad(dyn_cast<LoadSDNode>(N));
+def truncstorei8_global : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_global : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
}]>;
def local_store : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
- return isLocalStore(dyn_cast<StoreSDNode>(N));
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei8_local : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei8 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def truncstorei16_local : PatFrag<(ops node:$val, node:$ptr),
+ (truncstorei16 node:$val, node:$ptr), [{
+ return isLocalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isLocalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
+ (atomic_load_add node:$ptr, node:$value), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
+ (atomic_load_sub node:$ptr, node:$value), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def mskor_global : PatFrag<(ops node:$val, node:$ptr),
+ (AMDGPUstore_mskor node:$val, node:$ptr), [{
+ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
-int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
+int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
}
def CONST : Constants;
@@ -205,6 +285,8 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
ComplexPattern addrPat> {
+let UseNamedOperandTable = 1 in {
+
def RegisterLoad : AMDGPUShaderInst <
(outs dstClass:$dst),
(ins addrClass:$addr, i32imm:$chan),
@@ -223,6 +305,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
let isRegisterStore = 1;
}
}
+}
} // End isCodeGenOnly = 1, isPseudo = 1
@@ -254,61 +337,12 @@ class Insert_Element <ValueType elem_type, ValueType vec_type,
(INSERT_SUBREG $vec, $elem, sub_reg)
>;
-// Vector Build pattern
-class Vector1_Build <ValueType vecType, ValueType elemType,
- RegisterClass rc> : Pat <
- (vecType (build_vector elemType:$src)),
- (vecType (COPY_TO_REGCLASS $src, rc))
->;
-
-class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1)),
- (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
->;
-
class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
(vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
>;
-class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1,
- elemType:$sub2, elemType:$sub3,
- elemType:$sub4, elemType:$sub5,
- elemType:$sub6, elemType:$sub7)),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
- $sub2, sub2), $sub3, sub3),
- $sub4, sub4), $sub5, sub5),
- $sub6, sub6), $sub7, sub7)
->;
-
-class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
- (vecType (build_vector elemType:$sub0, elemType:$sub1,
- elemType:$sub2, elemType:$sub3,
- elemType:$sub4, elemType:$sub5,
- elemType:$sub6, elemType:$sub7,
- elemType:$sub8, elemType:$sub9,
- elemType:$sub10, elemType:$sub11,
- elemType:$sub12, elemType:$sub13,
- elemType:$sub14, elemType:$sub15)),
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
- (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
- $sub2, sub2), $sub3, sub3),
- $sub4, sub4), $sub5, sub5),
- $sub6, sub6), $sub7, sub7),
- $sub8, sub8), $sub9, sub9),
- $sub10, sub10), $sub11, sub11),
- $sub12, sub12), $sub13, sub13),
- $sub14, sub14), $sub15, sub15)
->;
-
// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
// can handle COPY instructions.
// bitconvert pattern
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index 1dc1c65..0ed598e 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -15,14 +15,19 @@
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include <algorithm>
using namespace llvm;
@@ -69,15 +74,45 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MachineBasicBlock::const_instr_iterator I = MI;
++I;
while (I != MBB->end() && I->isInsideBundle()) {
- MCInst MCBundleInst;
- const MachineInstr *BundledInst = I;
- MCInstLowering.lower(BundledInst, MCBundleInst);
- OutStreamer.EmitInstruction(MCBundleInst);
+ EmitInstruction(I);
++I;
}
} else {
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
+
+ if (DisasmEnabled) {
+ // Disassemble instruction/operands to text.
+ DisasmLines.resize(DisasmLines.size() + 1);
+ std::string &DisasmLine = DisasmLines.back();
+ raw_string_ostream DisasmStream(DisasmLine);
+
+ AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(), *TM.getInstrInfo(),
+ *TM.getRegisterInfo());
+ InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+
+ // Disassemble instruction/operands to hex representation.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallVector<char, 16> CodeBytes;
+ raw_svector_ostream CodeStream(CodeBytes);
+
+ MCObjectStreamer &ObjStreamer = (MCObjectStreamer &)OutStreamer;
+ MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
+ InstEmitter.EncodeInstruction(TmpInst, CodeStream, Fixups);
+ CodeStream.flush();
+
+ HexLines.resize(HexLines.size() + 1);
+ std::string &HexLine = HexLines.back();
+ raw_string_ostream HexStream(HexLine);
+
+ for (size_t i = 0; i < CodeBytes.size(); i += 4) {
+ unsigned int CodeDWord = *(unsigned int *)&CodeBytes[i];
+ HexStream << format("%s%08X", (i > 0 ? " " : ""), CodeDWord);
+ }
+
+ DisasmStream.flush();
+ DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLine.size());
+ }
}
}
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
index f2342b0..14171f4 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -6,6 +6,9 @@ using namespace llvm;
static const char *const ShaderTypeAttribute = "ShaderType";
+// Pin the vtable to this file.
+void AMDGPUMachineFunction::anchor() {}
+
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo() {
ShaderType = ShaderType::COMPUTE;
diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h
index 789b96a..fea0b39 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.h
+++ b/lib/Target/R600/AMDGPUMachineFunction.h
@@ -14,13 +14,18 @@
#define AMDGPUMACHINEFUNCTION_H
#include "llvm/CodeGen/MachineFunction.h"
+#include <map>
namespace llvm {
class AMDGPUMachineFunction : public MachineFunctionInfo {
+ virtual void anchor();
public:
AMDGPUMachineFunction(const MachineFunction &MF);
unsigned ShaderType;
+ /// A map to keep track of local memory objects and their offsets within
+ /// the local memory space.
+ std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
/// Number of bytes in the LDS that are being used.
unsigned LDSSize;
};
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 3402092..47617a7 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -46,27 +46,21 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return 0;
}
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
+ static const unsigned SubRegs[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
+ AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
+ AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
+ AMDGPU::sub15
+ };
+
+ assert (Channel < array_lengthof(SubRegs));
+ return SubRegs[Channel];
+}
+
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
- switch(IndirectIndex) {
- case 0: return AMDGPU::sub0;
- case 1: return AMDGPU::sub1;
- case 2: return AMDGPU::sub2;
- case 3: return AMDGPU::sub3;
- case 4: return AMDGPU::sub4;
- case 5: return AMDGPU::sub5;
- case 6: return AMDGPU::sub6;
- case 7: return AMDGPU::sub7;
- case 8: return AMDGPU::sub8;
- case 9: return AMDGPU::sub9;
- case 10: return AMDGPU::sub10;
- case 11: return AMDGPU::sub11;
- case 12: return AMDGPU::sub12;
- case 13: return AMDGPU::sub13;
- case 14: return AMDGPU::sub14;
- case 15: return AMDGPU::sub15;
- default: llvm_unreachable("indirect index out of range");
- }
+ return getSubRegFromChannel(IndirectIndex);
}
#define GET_REGINFO_TARGET_DESC
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 7cbd34b..688e1a0 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -50,6 +50,14 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
assert(!"Unimplemented"); return NULL;
}
+ virtual unsigned getHWRegIndex(unsigned Reg) const {
+ assert(!"Unimplemented"); return 0;
+ }
+
+ /// \returns the sub reg enum value for the given \p Channel
+ /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
+ unsigned getSubRegFromChannel(unsigned Channel) const;
+
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 8ed5a74..061793a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
-#include <stdio.h>
using namespace llvm;
@@ -37,6 +36,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
Gen = AMDGPUSubtarget::R600;
FP64 = false;
CaymanISA = false;
+ EnableIRStructurizer = true;
+ EnableIfCvt = true;
ParseSubtargetFeatures(GPU, FS);
DevName = GPU;
}
@@ -66,6 +67,14 @@ AMDGPUSubtarget::hasCaymanISA() const {
return CaymanISA;
}
bool
+AMDGPUSubtarget::IsIRStructurizerEnabled() const {
+ return EnableIRStructurizer;
+}
+bool
+AMDGPUSubtarget::isIfCvtEnabled() const {
+ return EnableIfCvt;
+}
+bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
@@ -98,6 +107,10 @@ AMDGPUSubtarget::getDataLayout() const {
DataLayout.append("-p:32:32:32");
}
+ if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ DataLayout.append("-p3:32:32:32");
+ }
+
return DataLayout;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 8c65096..4288d27 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -33,7 +33,8 @@ public:
R700,
EVERGREEN,
NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS
+ SOUTHERN_ISLANDS,
+ SEA_ISLANDS
};
private:
@@ -48,6 +49,8 @@ private:
enum Generation Gen;
bool FP64;
bool CaymanISA;
+ bool EnableIRStructurizer;
+ bool EnableIfCvt;
InstrItineraryData InstrItins;
@@ -63,6 +66,12 @@ public:
enum Generation getGeneration() const;
bool hasHWFP64() const;
bool hasCaymanISA() const;
+ bool IsIRStructurizerEnabled() const;
+ bool isIfCvtEnabled() const;
+
+ virtual bool enableMachineScheduler() const {
+ return getGeneration() <= NORTHERN_ISLANDS;
+ }
// Helper functions to simplify if statements
bool isTargetELF() const;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 5ebc5f2..bc4f5d7 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -33,6 +33,7 @@
#include "llvm/Transforms/Scalar.h"
#include <llvm/CodeGen/Passes.h>
+
using namespace llvm;
extern "C" void LLVMInitializeR600Target() {
@@ -58,8 +59,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
Layout(Subtarget.getDataLayout()),
- FrameLowering(TargetFrameLowering::StackGrowsUp, 16 // Stack Alignment
- , 0),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ 64 * 16 // Maximum stack alignment (long16)
+ , 0),
IntrinsicInfo(this),
InstrItins(&Subtarget.getInstrItineraryData()) {
// TLInfo uses InstrInfo so it must be initialized after.
@@ -80,17 +82,20 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- enablePass(&MachineSchedulerID);
- MachineSchedRegistry::setDefault(createR600MachineScheduler);
- }
- }
+ : TargetPassConfig(TM, PM) {}
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
}
+
+ virtual ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ return createR600MachineScheduler(C);
+ return 0;
+ }
+
virtual bool addPreISel();
virtual bool addInstSelector();
virtual bool addPreRegAlloc();
@@ -120,8 +125,11 @@ bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
addPass(createFlattenCFGPass());
- if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
+ if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createSinkingPass());
+ addPass(createSITypeRewriter());
addPass(createSIAnnotateControlFlowPass());
} else {
addPass(createR600TextureIntrinsicsReplacer());
@@ -131,12 +139,6 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- // This callbacks this pass uses are not implemented yet on SI.
- addPass(createAMDGPUIndirectAddressingPass(*TM));
- }
return false;
}
@@ -164,10 +166,12 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
bool AMDGPUPassConfig::addPreSched2() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
- if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
addPass(createR600EmitClauseMarkers(*TM));
- }
- addPass(&IfConverterID);
+ if (ST.isIfCvtEnabled())
+ addPass(&IfConverterID);
+ if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
+ addPass(createR600ClauseMergePass(*TM));
return false;
}
@@ -185,4 +189,3 @@ bool AMDGPUPassConfig::addPreEmitPass() {
return false;
}
-
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 687eadb..507570f 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -1005,13 +1005,14 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
return 0;
assert(isCondBranch(BranchMI));
+ int NumMatch = 0;
MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
- serialPatternMatch(TrueMBB);
- ifPatternMatch(TrueMBB);
+ NumMatch += serialPatternMatch(TrueMBB);
+ NumMatch += ifPatternMatch(TrueMBB);
MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
- serialPatternMatch(FalseMBB);
- ifPatternMatch(FalseMBB);
+ NumMatch += serialPatternMatch(FalseMBB);
+ NumMatch += ifPatternMatch(FalseMBB);
MachineBasicBlock *LandBlk;
int Cloned = 0;
@@ -1040,7 +1041,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
&& isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
LandBlk = *TrueMBB->succ_begin();
} else {
- return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
+ return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
}
// improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
@@ -1068,7 +1069,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
numClonedBlock += Cloned;
- return 1 + Cloned;
+ return 1 + Cloned + NumMatch;
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
@@ -1233,7 +1234,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
numClonedBlock += Num;
Num += serialPatternMatch(*HeadMBB->succ_begin());
- Num += serialPatternMatch(*(++HeadMBB->succ_begin()));
+ Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
Num += ifPatternMatch(HeadMBB);
assert(Num > 0);
@@ -1335,32 +1336,77 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// add initReg = initVal to headBlk
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned InitReg =
- HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- if (!MigrateTrue || !MigrateFalse)
- llvm_unreachable("Extra register needed to handle CFG");
+ if (!MigrateTrue || !MigrateFalse) {
+ // XXX: We have an opportunity here to optimize the "branch into if" case
+ // here. Branch into if looks like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // The diamond_head block begins the "if" and the diamond_true block
+ // is the block being "branched into".
+ //
+ // If MigrateTrue is true, then TrueBB is the block being "branched into"
+ // and if MigrateFalse is true, then FalseBB is the block being
+ // "branched into"
+ //
+ // Here is the pseudo code for how I think the optimization should work:
+ // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
+ // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
+ // 3. Move the branch instruction from diamond_head into its own basic
+ // block (new_block).
+ // 4. Add an unconditional branch from diamond_head to new_block
+ // 5. Replace the branch instruction in branch_from with an unconditional
+ // branch to new_block. If branch_from has multiple predecessors, then
+ // we need to replace the True/False block in the branch
+ // instruction instead of replacing it.
+ // 6. Change the condition of the branch instruction in new_block from
+ // COND to (COND || GPR0)
+ //
+ // In order insert these MOV instruction, we will need to use the
+ // RegisterScavenger. Usually liveness stops being tracked during
+ // the late machine optimization passes, however if we implement
+ // bool TargetRegisterInfo::requiresRegisterScavenging(
+ // const MachineFunction &MF)
+ // and have it return true, liveness will be tracked correctly
+ // by generic optimization passes. We will also need to make sure that
+ // all of our target-specific passes that run after regalloc and before
+ // the CFGStructurizer track liveness and we will need to modify this pass
+ // to correctly track liveness.
+ //
+ // After the above changes, the new CFG should look like this:
+ // entry
+ // / |
+ // diamond_head branch_from
+ // \ /
+ // new_block
+ // / |
+ // diamond_false diamond_true
+ // \ /
+ // done
+ //
+ // Without this optimization, we are forced to duplicate the diamond_true
+ // block and we will end up with a CFG like this:
+ //
+ // entry
+ // / |
+ // diamond_head branch_from
+ // / \ |
+ // diamond_false diamond_true diamond_true (duplicate)
+ // \ / |
+ // done --------------------|
+ //
+ // Duplicating diamond_true can be very costly especially if it has a
+ // lot of instructions.
+ return 0;
+ }
int NumNewBlk = 0;
- if (!LandBlk) {
- LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
- HeadMBB->getParent()->push_back(LandBlk); //insert to function
-
- if (TrueMBB) {
- TrueMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- if (FalseMBB) {
- FalseMBB->addSuccessor(LandBlk);
- } else {
- HeadMBB->addSuccessor(LandBlk);
- }
-
- NumNewBlk ++;
- }
-
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
//insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
@@ -1375,6 +1421,10 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
CmpResReg, DebugLoc());
}
+ // XXX: We are running this after RA, so creating virtual registers will
+ // cause an assertion failure in the PostRA scheduling pass.
+ unsigned InitReg =
+ HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
DebugLoc());
@@ -1713,7 +1763,7 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
if (MBB->succ_size() != 2)
return;
MachineBasicBlock *MBB1 = *MBB->succ_begin();
- MachineBasicBlock *MBB2 = *(++MBB->succ_begin());
+ MachineBasicBlock *MBB2 = *llvm::next(MBB->succ_begin());
if (MBB1 != MBB2)
return;
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
index f7d0bd5..0f0c88d 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -118,15 +118,15 @@ class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
// Multiclass Instruction formats
//===--------------------------------------------------------------------===//
// Multiclass that handles branch instructions
-multiclass BranchConditional<SDNode Op> {
+multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
def _i32 : ILFormat<(outs),
- (ins brtarget:$target, GPRI32:$src0),
+ (ins brtarget:$target, rci:$src0),
"; i32 Pseudo branch instruction",
- [(Op bb:$target, GPRI32:$src0)]>;
+ [(Op bb:$target, (i32 rci:$src0))]>;
def _f32 : ILFormat<(outs),
- (ins brtarget:$target, GPRF32:$src0),
+ (ins brtarget:$target, rcf:$src0),
"; f32 Pseudo branch instruction",
- [(Op bb:$target, GPRF32:$src0)]>;
+ [(Op bb:$target, (f32 rcf:$src0))]>;
}
// Only scalar types should generate flow control
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index fde187b..9f8f6a8 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_target(R600CodeGen
AMDILISelLowering.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
- AMDGPUIndirectAddressing.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
@@ -28,6 +27,7 @@ add_llvm_target(R600CodeGen
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
AMDGPURegisterInfo.cpp
+ R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
R600ExpandSpecialInstrs.cpp
@@ -47,6 +47,7 @@ add_llvm_target(R600CodeGen
SILowerControlFlow.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
+ SITypeRewriter.cpp
)
add_dependencies(LLVMR600CodeGen AMDGPUCommonTableGen intrinsics_gen)
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index fac3c39..99e1377 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -23,6 +23,63 @@ void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
printAnnotation(OS, Annot);
}
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+ switch (reg) {
+ case AMDGPU::VCC:
+ O << "vcc";
+ return;
+ case AMDGPU::SCC:
+ O << "scc";
+ return;
+ case AMDGPU::EXEC:
+ O << "exec";
+ return;
+ case AMDGPU::M0:
+ O << "m0";
+ return;
+ default:
+ break;
+ }
+
+ // It's seems there's no way to use SIRegisterInfo here, and dealing with the
+ // giant enum of all the different shifted sets of registers is pretty
+ // unmanagable, so parse the name and reformat it to be prettier.
+ StringRef Name(getRegisterName(reg));
+
+ std::pair<StringRef, StringRef> Split = Name.split('_');
+ StringRef SubRegName = Split.first;
+ StringRef Rest = Split.second;
+
+ if (SubRegName.size() <= 4) { // Must at least be as long as "SGPR"/"VGPR".
+ O << Name;
+ return;
+ }
+
+ unsigned RegIndex;
+ StringRef RegIndexStr = SubRegName.drop_front(4);
+
+ if (RegIndexStr.getAsInteger(10, RegIndex)) {
+ O << Name;
+ return;
+ }
+
+ if (SubRegName.front() == 'V')
+ O << 'v';
+ else if (SubRegName.front() == 'S')
+ O << 's';
+ else {
+ O << Name;
+ return;
+ }
+
+ if (Rest.empty()) // Only 1 32-bit register
+ O << RegIndex;
+ else {
+ unsigned NumReg = Rest.count('_') + 2;
+ O << '[' << RegIndex << ':' << (RegIndex + NumReg - 1) << ']';
+ }
+}
+
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
@@ -30,8 +87,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Op.isReg()) {
switch (Op.getReg()) {
// This is the default predicate state, so we don't need to print it.
- case AMDGPU::PRED_SEL_OFF: break;
- default: O << getRegisterName(Op.getReg()); break;
+ case AMDGPU::PRED_SEL_OFF:
+ break;
+
+ default:
+ printRegOperand(Op.getReg(), O);
+ break;
}
} else if (Op.isImm()) {
O << Op.getImm();
@@ -255,4 +316,21 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // Note: Mask values are taken from SIInsertWaits.cpp and not from ISA docs
+ // SIInsertWaits.cpp bits usage does not match ISA docs description but it
+ // works so it might be a misprint in docs.
+ unsigned SImm16 = MI->getOperand(OpNo).getImm();
+ unsigned Vmcnt = SImm16 & 0xF;
+ unsigned Expcnt = (SImm16 >> 4) & 0xF;
+ unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
+ if (Vmcnt != 0xF)
+ O << "vmcnt(" << Vmcnt << ") ";
+ if (Expcnt != 0x7)
+ O << "expcnt(" << Expcnt << ") ";
+ if (Lgkmcnt != 0x7)
+ O << "lgkmcnt(" << Lgkmcnt << ")";
+}
+
#include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 4c1dfa6..77af942 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -32,6 +32,7 @@ public:
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
private:
+ void printRegOperand(unsigned RegNo, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -52,6 +53,7 @@ private:
void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 9a36903..29d0acf 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -95,7 +95,9 @@ public:
} // end anonymous namespace
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
StringRef CPU) {
return new ELFAMDGPUAsmBackend(T);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 59136f3..4a8e1b0 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -21,7 +21,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
HasStaticCtorDtorReferenceInStaticMode = false;
LinkerRequiresNonEmptyDwarfLines = true;
MaxInstLength = 16;
- PCSymbol = "$";
SeparatorString = "\n";
CommentColumn = 40;
CommentString = ";";
@@ -32,9 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
InlineAsmStart = ";#ASMSTART";
InlineAsmEnd = ";#ASMEND";
AssemblerDialect = 0;
- AllowQuotesInName = false;
- AllowNameToStartWithDigit = false;
- AllowPeriodsInName = false;
//===--- Data Emission Directives -------------------------------------===//
ZeroDirective = ".zero";
@@ -56,13 +52,11 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
//===--- Global Variable Emission Directives --------------------------===//
GlobalDirective = ".global";
- ExternDirective = ".extern";
HasSetDirective = false;
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
- HasSymbolResolver = false;
WeakRefDirective = ".weakref\t";
LinkOnceDirective = 0;
//===--- Dwarf Emission Directives -----------------------------------===//
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
new file mode 100644
index 0000000..521b3b3
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -0,0 +1,21 @@
+//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCCodeEmitter.h"
+
+using namespace llvm;
+
+// pin vtable to this file
+void AMDGPUMCCodeEmitter::anchor() {}
+
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
index cd3a7ce..d8cf64a 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -24,6 +24,7 @@ class MCInst;
class MCOperand;
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+ virtual void anchor();
public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 61d70bb..a1bec28 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCCodeEmitter *_Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
+ return createELFStreamer(Ctx, 0, MAB, _OS, _Emitter, false, false);
}
extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index abb0320..f6b3376 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -40,8 +40,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
} // End llvm namespace
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
index 3ccdf42..98f6925 100644
--- a/lib/Target/R600/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -2,6 +2,7 @@
add_llvm_library(LLVMR600Desc
AMDGPUAsmBackend.cpp
AMDGPUELFObjectWriter.cpp
+ AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
AMDGPUMCAsmInfo.cpp
R600MCCodeEmitter.cpp
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index f470783..dd8df65 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -24,7 +24,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
using namespace llvm;
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 4631c04..ee190e4 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -48,6 +48,7 @@ def : Proc<"pitcairn", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"verde", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"oland", SI_Itin, [FeatureSouthernIslands]>;
def : Proc<"hainan", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"bonaire", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kabini", SI_Itin, [FeatureSouthernIslands]>;
-def : Proc<"kaveri", SI_Itin, [FeatureSouthernIslands]>;
+def : Proc<"bonaire", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
+def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
new file mode 100644
index 0000000..33d2ca3
--- /dev/null
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -0,0 +1,204 @@
+//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
+/// This pass is merging consecutive CFAlus where applicable.
+/// It needs to be called after IfCvt for best results.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "r600mergeclause"
+#include "AMDGPU.h"
+#include "R600Defines.h"
+#include "R600InstrInfo.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static bool isCFAlu(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case AMDGPU::CF_ALU:
+ case AMDGPU::CF_ALU_PUSH_BEFORE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+class R600ClauseMergePass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const R600InstrInfo *TII;
+
+ unsigned getCFAluSize(const MachineInstr *MI) const;
+ bool isCFAluEnabled(const MachineInstr *MI) const;
+
+ /// IfCvt pass can generate "disabled" ALU clause marker that need to be
+ /// removed and their content affected to the previous alu clause.
+ /// This function parse instructions after CFAlu untill it find a disabled
+ /// CFAlu and merge the content, or an enabled CFAlu.
+ void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
+
+ /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
+ /// it is the case.
+ bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
+ const;
+
+public:
+ R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const;
+};
+
+char R600ClauseMergePass::ID = 0;
+
+unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
+ assert(isCFAlu(MI));
+ return MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
+}
+
+bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
+ assert(isCFAlu(MI));
+ return MI->getOperand(
+ TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
+}
+
+void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
+ const {
+ int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
+ I++;
+ do {
+ while (I!= E && !isCFAlu(I))
+ I++;
+ if (I == E)
+ return;
+ MachineInstr *MI = I++;
+ if (isCFAluEnabled(MI))
+ break;
+ CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
+ MI->eraseFromParent();
+ } while (I != E);
+}
+
+bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
+ const MachineInstr *LatrCFAlu) const {
+ assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
+ int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ unsigned RootInstCount = getCFAluSize(RootCFAlu),
+ LaterInstCount = getCFAluSize(LatrCFAlu);
+ unsigned CumuledInsts = RootInstCount + LaterInstCount;
+ if (CumuledInsts >= TII->getMaxAlusPerClause()) {
+ DEBUG(dbgs() << "Excess inst counts\n");
+ return false;
+ }
+ if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ return false;
+ // Is KCache Bank 0 compatible ?
+ int Mode0Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+ int KBank0Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+ int KBank0LineIdx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+ if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
+ RootCFAlu->getOperand(Mode0Idx).getImm() &&
+ (LatrCFAlu->getOperand(KBank0Idx).getImm() !=
+ RootCFAlu->getOperand(KBank0Idx).getImm() ||
+ LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
+ RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
+ DEBUG(dbgs() << "Wrong KC0\n");
+ return false;
+ }
+ // Is KCache Bank 1 compatible ?
+ int Mode1Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+ int KBank1Idx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+ int KBank1LineIdx =
+ TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+ if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
+ RootCFAlu->getOperand(Mode1Idx).getImm() &&
+ (LatrCFAlu->getOperand(KBank1Idx).getImm() !=
+ RootCFAlu->getOperand(KBank1Idx).getImm() ||
+ LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
+ RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
+ DEBUG(dbgs() << "Wrong KC0\n");
+ return false;
+ }
+ if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
+ RootCFAlu->getOperand(Mode0Idx).setImm(
+ LatrCFAlu->getOperand(Mode0Idx).getImm());
+ RootCFAlu->getOperand(KBank0Idx).setImm(
+ LatrCFAlu->getOperand(KBank0Idx).getImm());
+ RootCFAlu->getOperand(KBank0LineIdx).setImm(
+ LatrCFAlu->getOperand(KBank0LineIdx).getImm());
+ }
+ if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
+ RootCFAlu->getOperand(Mode1Idx).setImm(
+ LatrCFAlu->getOperand(Mode1Idx).getImm());
+ RootCFAlu->getOperand(KBank1Idx).setImm(
+ LatrCFAlu->getOperand(KBank1Idx).getImm());
+ RootCFAlu->getOperand(KBank1LineIdx).setImm(
+ LatrCFAlu->getOperand(KBank1LineIdx).getImm());
+ }
+ RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
+ RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
+ return true;
+}
+
+bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ MachineBasicBlock::iterator LatestCFAlu = E;
+ while (I != E) {
+ MachineInstr *MI = I++;
+ if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
+ TII->mustBeLastInClause(MI->getOpcode()))
+ LatestCFAlu = E;
+ if (!isCFAlu(MI))
+ continue;
+ cleanPotentialDisabledCFAlu(MI);
+
+ if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
+ MI->eraseFromParent();
+ } else {
+ assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
+ LatestCFAlu = MI;
+ }
+ }
+ }
+ return false;
+}
+
+const char *R600ClauseMergePass::getPassName() const {
+ return "R600 Merge Clause Markers Pass";
+}
+
+} // end anonymous namespace
+
+
+llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
+ return new R600ClauseMergePass(TM);
+}
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 715be37..ac3d8f6 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -373,15 +373,6 @@ public:
case AMDGPU::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
- case AMDGPU::EG_ExportBuf:
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportBuf:
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
- case AMDGPU::RAT_STORE_DWORD32_cm:
- case AMDGPU::RAT_STORE_DWORD64_cm:
DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
@@ -491,6 +482,10 @@ public:
EmitALUClause(I, AluClauses[i], CfCount);
}
default:
+ if (TII->isExport(MI->getOpcode())) {
+ DEBUG(dbgs() << CfCount << ":"; MI->dump(););
+ CfCount++;
+ }
break;
}
}
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 90fc29c..1781f2a 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -44,7 +44,9 @@ namespace R600_InstFlag {
TEX_INST = (1 << 13),
ALU_INST = (1 << 14),
LDS_1A = (1 << 15),
- LDS_1A1D = (1 << 16)
+ LDS_1A1D = (1 << 16),
+ IS_EXPORT = (1 << 17),
+ LDS_1A2D = (1 << 18)
};
}
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index fac2b47..1bbfd2b 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -47,6 +47,11 @@ private:
break;
}
+ // These will be expanded to two ALU instructions in the
+ // ExpandSpecialInstructions pass.
+ if (TII->isLDSRetInstr(MI->getOpcode()))
+ return 2;
+
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
TII->isReductionOp(MI->getOpcode()))
@@ -84,6 +89,7 @@ private:
switch (MI->getOpcode()) {
case AMDGPU::KILL:
case AMDGPU::RETURN:
+ case AMDGPU::IMPLICIT_DEF:
return true;
default:
return false;
@@ -105,8 +111,13 @@ private:
}
bool SubstituteKCacheBank(MachineInstr *MI,
- std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const {
+ std::vector<std::pair<unsigned, unsigned> > &CachedConsts,
+ bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+
+ if (!TII->isALUInstr(MI->getOpcode()) && MI->getOpcode() != AMDGPU::DOT_4)
+ return true;
+
const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Consts =
TII->getSrcs(MI);
assert((TII->isALUInstr(MI->getOpcode()) ||
@@ -139,6 +150,9 @@ private:
return false;
}
+ if (!UpdateInstr)
+ return true;
+
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
continue;
@@ -159,6 +173,52 @@ private:
return true;
}
+ bool canClauseLocalKillFitInClause(
+ unsigned AluInstCount,
+ std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
+ MachineBasicBlock::iterator Def,
+ MachineBasicBlock::iterator BBEnd) {
+ const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ for (MachineInstr::const_mop_iterator
+ MOI = Def->operands_begin(),
+ MOE = Def->operands_end(); MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef() ||
+ TRI.isPhysRegLiveAcrossClauses(MOI->getReg()))
+ continue;
+
+ // Def defines a clause local register, so check that its use will fit
+ // in the clause.
+ unsigned LastUseCount = 0;
+ for (MachineBasicBlock::iterator UseI = Def; UseI != BBEnd; ++UseI) {
+ AluInstCount += OccupiedDwords(UseI);
+ // Make sure we won't need to end the clause due to KCache limitations.
+ if (!SubstituteKCacheBank(UseI, KCacheBanks, false))
+ return false;
+
+ // We have reached the maximum instruction limit before finding the
+ // use that kills this register, so we cannot use this def in the
+ // current clause.
+ if (AluInstCount >= TII->getMaxAlusPerClause())
+ return false;
+
+ // Register kill flags have been cleared by the time we get to this
+ // pass, but it is safe to assume that all uses of this register
+ // occur in the same basic block as its definition, because
+ // it is illegal for the scheduler to schedule them in
+ // different blocks.
+ if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
+ LastUseCount = AluInstCount;
+
+ if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
+ break;
+ }
+ if (LastUseCount)
+ return LastUseCount <= TII->getMaxAlusPerClause();
+ llvm_unreachable("Clause local register live at end of clause.");
+ }
+ return true;
+ }
+
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
@@ -173,6 +233,14 @@ private:
if (AluInstCount > TII->getMaxAlusPerClause())
break;
if (I->getOpcode() == AMDGPU::PRED_X) {
+ // We put PRED_X in its own clause to ensure that ifcvt won't create
+ // clauses with more than 128 insts.
+ // IfCvt is indeed checking that "then" and "else" branches of an if
+ // statement have less than ~60 insts thus converted clauses can't be
+ // bigger than ~121 insts (predicate setter needs to be in the same
+ // clause as predicated alus).
+ if (AluInstCount > 0)
+ break;
if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH)
PushBeforeModifier = true;
AluInstCount ++;
@@ -189,11 +257,13 @@ private:
I++;
break;
}
- if (TII->isALUInstr(I->getOpcode()) &&
- !SubstituteKCacheBank(I, KCacheBanks))
+
+ // If this instruction defines a clause local register, make sure
+ // its use can fit in this clause.
+ if (!canClauseLocalKillFitInClause(AluInstCount, KCacheBanks, I, E))
break;
- if (I->getOpcode() == AMDGPU::DOT_4 &&
- !SubstituteKCacheBank(I, KCacheBanks))
+
+ if (!SubstituteKCacheBank(I, KCacheBanks))
break;
AluInstCount += OccupiedDwords(I);
}
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index 67b42d7..aeee4aa 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -68,6 +68,23 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
I = llvm::next(I);
+ // Expand LDS_*_RET instructions
+ if (TII->isLDSRetInstr(MI.getOpcode())) {
+ int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ MachineOperand &DstOp = MI.getOperand(DstIdx);
+ MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
+ DstOp.getReg(), AMDGPU::OQAP);
+ DstOp.setReg(AMDGPU::OQAP);
+ int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::pred_sel);
+ int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
+ AMDGPU::OpName::pred_sel);
+ // Copy the pred_sel bit
+ Mov->getOperand(MovPredSelIdx).setReg(
+ MI.getOperand(LDSPredSelIdx).getReg());
+ }
+
switch (MI.getOpcode()) {
default: break;
// Expand PRED_X to one of the PRED_SET instructions.
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index ce6ac89..0fcb488 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -38,14 +38,24 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
computeRegisterProperties();
- setOperationAction(ISD::FADD, MVT::v4f32, Expand);
- setOperationAction(ISD::FADD, MVT::v2f32, Expand);
- setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
- setOperationAction(ISD::FDIV, MVT::v2f32, Expand);
- setOperationAction(ISD::FSUB, MVT::v4f32, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f32, Expand);
+ // Set condition code actions
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Custom);
@@ -69,21 +79,33 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
- setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
+ // spaces, so it is custom lowered to handle those where it isn't.
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
+
setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setTruncStoreAction(MVT::i32, MVT::i8, Custom);
+ setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -99,7 +121,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- setSchedulingPreference(Sched::VLIW);
+ setSchedulingPreference(Sched::Source);
}
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
@@ -111,7 +133,25 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
static_cast<const R600InstrInfo*>(MF->getTarget().getInstrInfo());
switch (MI->getOpcode()) {
- default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ default:
+ // Replace LDS_*_RET instruction that don't have any uses with the
+ // equivalent LDS_*_NORET instruction.
+ if (TII->isLDSRetInstr(MI->getOpcode())) {
+ int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ MachineInstrBuilder NewMI;
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
+ return BB;
+
+ NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
+ TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
+ NewMI.addOperand(MI->getOperand(i));
+ }
+ } else {
+ return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
+ }
+ break;
case AMDGPU::CLAMP_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
AMDGPU::MOV,
@@ -147,19 +187,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
break;
}
- case AMDGPU::LDS_READ_RET: {
- MachineInstrBuilder NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(MI->getOpcode()),
- AMDGPU::OQAP);
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
- NewMI.addOperand(MI->getOperand(i));
- }
- TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- AMDGPU::OQAP);
- break;
- }
-
case AMDGPU::MOV_IMM_F32:
TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
MI->getOperand(1).getFPImm()->getValueAPF()
@@ -486,10 +513,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::FCOS:
case ISD::FSIN: return LowerTrig(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -554,7 +579,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
DL, MVT::f32, SDValue(interp, 0));
}
-
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
@@ -576,6 +600,24 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
RegisterJNode, RegisterINode);
return SDValue(interp, slot % 2);
}
+ case AMDGPUIntrinsic::R600_interp_xy:
+ case AMDGPUIntrinsic::R600_interp_zw: {
+ int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ MachineSDNode *interp;
+ SDValue RegisterINode = Op.getOperand(2);
+ SDValue RegisterJNode = Op.getOperand(3);
+
+ if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ RegisterJNode, RegisterINode);
+ else
+ interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
+ MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
+ RegisterJNode, RegisterINode);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
+ SDValue(interp, 0), SDValue(interp, 1));
+ }
case AMDGPUIntrinsic::R600_tex:
case AMDGPUIntrinsic::R600_texc:
case AMDGPUIntrinsic::R600_txl:
@@ -585,7 +627,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::R600_txf:
case AMDGPUIntrinsic::R600_txq:
case AMDGPUIntrinsic::R600_ddx:
- case AMDGPUIntrinsic::R600_ddy: {
+ case AMDGPUIntrinsic::R600_ddy:
+ case AMDGPUIntrinsic::R600_ldptr: {
unsigned TextureOp;
switch (IntrinsicID) {
case AMDGPUIntrinsic::R600_tex:
@@ -618,6 +661,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case AMDGPUIntrinsic::R600_ddy:
TextureOp = 9;
break;
+ case AMDGPUIntrinsic::R600_ldptr:
+ TextureOp = 10;
+ break;
default:
llvm_unreachable("Unknow Texture Operation");
}
@@ -792,20 +838,6 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
false, false, false, 0);
}
-SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
-
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
-
- unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
@@ -836,16 +868,27 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
//
// SET* can match the following patterns:
//
- // select_cc f32, f32, -1, 0, cc_any
- // select_cc f32, f32, 1.0f, 0.0f, cc_any
- // select_cc i32, i32, -1, 0, cc_any
+ // select_cc f32, f32, -1, 0, cc_supported
+ // select_cc f32, f32, 1.0f, 0.0f, cc_supported
+ // select_cc i32, i32, -1, 0, cc_supported
//
// Move hardware True/False values to the correct operand.
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode InverseCC =
+ ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
if (isHWTrueValue(False) && isHWFalseValue(True)) {
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- std::swap(False, True);
- CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+ if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
+ std::swap(False, True);
+ CC = DAG.getCondCode(InverseCC);
+ } else {
+ ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
+ if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
+ std::swap(False, True);
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(SwapInvCC);
+ }
+ }
}
if (isHWTrueValue(True) && isHWFalseValue(False) &&
@@ -858,14 +901,34 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
//
// CND* can match the following patterns:
//
- // select_cc f32, 0.0, f32, f32, cc_any
- // select_cc f32, 0.0, i32, i32, cc_any
- // select_cc i32, 0, f32, f32, cc_any
- // select_cc i32, 0, i32, i32, cc_any
+ // select_cc f32, 0.0, f32, f32, cc_supported
+ // select_cc f32, 0.0, i32, i32, cc_supported
+ // select_cc i32, 0, f32, f32, cc_supported
+ // select_cc i32, 0, i32, i32, cc_supported
//
- if (isZero(LHS) || isZero(RHS)) {
- SDValue Cond = (isZero(LHS) ? RHS : LHS);
- SDValue Zero = (isZero(LHS) ? LHS : RHS);
+
+ // Try to move the zero value to the RHS
+ if (isZero(LHS)) {
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ // Try swapping the operands
+ ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
+ if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(CCSwapped);
+ } else {
+ // Try inverting the conditon and then swapping the operands
+ ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
+ CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
+ if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+ std::swap(True, False);
+ std::swap(LHS, RHS);
+ CC = DAG.getCondCode(CCSwapped);
+ }
+ }
+ }
+ if (isZero(RHS)) {
+ SDValue Cond = LHS;
+ SDValue Zero = RHS;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
if (CompareVT != VT) {
// Bitcast True / False to the correct types. This will end up being
@@ -875,20 +938,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
}
- if (isZero(LHS)) {
- CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
- }
switch (CCOpcode) {
case ISD::SETONE:
case ISD::SETUNE:
case ISD::SETNE:
- case ISD::SETULE:
- case ISD::SETULT:
- case ISD::SETOLE:
- case ISD::SETOLT:
- case ISD::SETLE:
- case ISD::SETLT:
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
Temp = True;
True = False;
@@ -936,17 +990,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
DAG.getCondCode(ISD::SETNE));
}
-SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- return DAG.getNode(ISD::SELECT_CC,
- SDLoc(Op),
- Op.getValueType(),
- Op.getOperand(0),
- DAG.getConstant(0, MVT::i32),
- Op.getOperand(1),
- Op.getOperand(2),
- DAG.getCondCode(ISD::SETNE));
-}
-
/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
@@ -1009,19 +1052,59 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
- if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
- Ptr->getOpcode() != AMDGPUISD::DWORDADDR) {
- // Convert pointer from byte address to dword address.
- Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
- DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
- Ptr, DAG.getConstant(2, MVT::i32)));
+ SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
- if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
- assert(!"Truncated and indexed stores not supported yet");
- } else {
- Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
+ if (StoreNode->isTruncatingStore()) {
+ EVT VT = Value.getValueType();
+ assert(VT.bitsLE(MVT::i32));
+ EVT MemVT = StoreNode->getMemoryVT();
+ SDValue MaskConstant;
+ if (MemVT == MVT::i8) {
+ MaskConstant = DAG.getConstant(0xFF, MVT::i32);
+ } else {
+ assert(MemVT == MVT::i16);
+ MaskConstant = DAG.getConstant(0xFFFF, MVT::i32);
+ }
+ SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(0x00000003, VT));
+ SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
+ DAG.getConstant(3, VT));
+ SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
+ SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
+ // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
+ // vector instead.
+ SDValue Src[4] = {
+ ShiftedValue,
+ DAG.getConstant(0, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ Mask
+ };
+ SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
+ SDValue Args[3] = { Chain, Input, DWordAddr };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
+ Op->getVTList(), Args, 3, MemVT,
+ StoreNode->getMemOperand());
+ } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
+ Value.getValueType().bitsGE(MVT::i32)) {
+ // Convert pointer from byte address to dword address.
+ Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
+ DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
+ Ptr, DAG.getConstant(2, MVT::i32)));
+
+ if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
+ assert(!"Truncated and indexed stores not supported yet");
+ } else {
+ Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
+ }
+ return Chain;
}
- return Chain;
}
EVT ValueVT = Value.getValueType();
@@ -1121,12 +1204,22 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
+ if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+ }
+
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
- if (ConstantBlock > -1) {
+ if (ConstantBlock > -1 &&
+ ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
+ (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) ||
- dyn_cast<Constant>(LoadNode->getSrcValue()) ||
- dyn_cast<ConstantSDNode>(Ptr)) {
+ if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
+ isa<Constant>(LoadNode->getSrcValue()) ||
+ isa<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
// We want Const position encoded with the following formula :
@@ -1166,13 +1259,13 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(MergedValues, 2, DL);
}
- // For most operations returning SDValue() will result int he node being
- // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so
- // we need to manually expand loads that may be legal in some address spaces
- // and illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported
- // for compute shaders, since the data is sign extended when it is uploaded
- // to the buffer. Howerver SEXT loads from other addresspaces are not
- // supported, so we need to expand them here.
+ // For most operations returning SDValue() will result in the node being
+ // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
+ // need to manually expand loads that may be legal in some address spaces and
+ // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
+ // compute shaders, since the data is sign extended when it is uploaded to the
+ // buffer. However SEXT loads from other address spaces are not supported, so
+ // we need to expand them here.
if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
EVT MemVT = LoadNode->getMemoryVT();
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
@@ -1252,23 +1345,39 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
+
+ SmallVector<ISD::InputArg, 8> LocalIns;
+
+ getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
+ LocalIns);
- AnalyzeFormalArguments(CCInfo, Ins);
+ AnalyzeFormalArguments(CCInfo, LocalIns);
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT VT = VA.getLocVT();
+ EVT VT = Ins[i].VT;
+ EVT MemVT = LocalIns[i].VT;
+
+ if (ShaderType != ShaderType::COMPUTE) {
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
+ SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ InVals.push_back(Register);
+ continue;
+ }
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_BUFFER_0);
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = DAG.getLoad(VT, DL, Chain,
- DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
- MachinePointerInfo(UndefValue::get(PtrTy)), false,
- false, false, 4); // 4 is the prefered alignment for
- // the CONSTANT memory space.
+ SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
+ DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
+ MachinePointerInfo(UndefValue::get(PtrTy)),
+ MemVT, false, false, 4);
+ // 4 is the prefered alignment for
+ // the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
@@ -1292,6 +1401,11 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
};
for (unsigned i = 0; i < 4; i++) {
+ if (NewBldVec[i].getOpcode() == ISD::UNDEF)
+ // We mask write here to teach later passes that the ith element of this
+ // vector is undef. Thus we can use it to reduce 128 bits reg usage,
+ // break false dependencies and additionnaly make assembly easier to read.
+ RemapSwizzle[i] = 7; // SEL_MASK_WRITE
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
if (C->isZero()) {
RemapSwizzle[i] = 4; // SEL_0
@@ -1335,12 +1449,16 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
->getZExtValue();
- if (!isUnmovable[Idx]) {
- // Swap i and Idx
- std::swap(NewBldVec[Idx], NewBldVec[i]);
- std::swap(RemapSwizzle[RemapSwizzle[Idx]], RemapSwizzle[RemapSwizzle[i]]);
+ if (i == Idx) {
+ isUnmovable[Idx] = true;
+ continue;
}
- isUnmovable[Idx] = true;
+ if (isUnmovable[Idx])
+ continue;
+ // Swap i and Idx
+ std::swap(NewBldVec[Idx], NewBldVec[i]);
+ std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
+ break;
}
}
@@ -1422,8 +1540,8 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
break;
}
- // insert_vector_elt (build_vector elt0, …, eltN), NewEltIdx, idx
- // => build_vector elt0, …, NewEltIdx, …, eltN
+ // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
+ // => build_vector elt0, ... , NewEltIdx, ... , eltN
case ISD::INSERT_VECTOR_ELT: {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -1525,15 +1643,20 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
LHSCC = ISD::getSetCCInverse(LHSCC,
LHS.getOperand(0).getValueType().isInteger());
- return DAG.getSelectCC(SDLoc(N),
- LHS.getOperand(0),
- LHS.getOperand(1),
- LHS.getOperand(2),
- LHS.getOperand(3),
- LHSCC);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
+ return DAG.getSelectCC(SDLoc(N),
+ LHS.getOperand(0),
+ LHS.getOperand(1),
+ LHS.getOperand(2),
+ LHS.getOperand(3),
+ LHSCC);
+ break;
}
}
+ return SDValue();
}
+
case AMDGPUISD::EXPORT: {
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
@@ -1586,3 +1709,253 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
return SDValue();
}
+
+static bool
+FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
+ SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ if (!Src.isMachineOpcode())
+ return false;
+ switch (Src.getMachineOpcode()) {
+ case AMDGPU::FNEG_R600:
+ if (!Neg.getNode())
+ return false;
+ Src = Src.getOperand(0);
+ Neg = DAG.getTargetConstant(1, MVT::i32);
+ return true;
+ case AMDGPU::FABS_R600:
+ if (!Abs.getNode())
+ return false;
+ Src = Src.getOperand(0);
+ Abs = DAG.getTargetConstant(1, MVT::i32);
+ return true;
+ case AMDGPU::CONST_COPY: {
+ unsigned Opcode = ParentNode->getMachineOpcode();
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+
+ if (!Sel.getNode())
+ return false;
+
+ SDValue CstOffset = Src.getOperand(0);
+ if (ParentNode->getValueType(0).isVector())
+ return false;
+
+ // Gather constants values
+ int SrcIndices[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ };
+ std::vector<unsigned> Consts;
+ for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
+ int OtherSrcIdx = SrcIndices[i];
+ int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
+ if (OtherSrcIdx < 0 || OtherSelIdx < 0)
+ continue;
+ if (HasDst) {
+ OtherSrcIdx--;
+ OtherSelIdx--;
+ }
+ if (RegisterSDNode *Reg =
+ dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
+ if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
+ ParentNode->getOperand(OtherSelIdx));
+ Consts.push_back(Cst->getZExtValue());
+ }
+ }
+ }
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ Consts.push_back(Cst->getZExtValue());
+ if (!TII->fitsConstReadLimitations(Consts)) {
+ return false;
+ }
+
+ Sel = CstOffset;
+ Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ return true;
+ }
+ case AMDGPU::MOV_IMM_I32:
+ case AMDGPU::MOV_IMM_F32: {
+ unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+ uint64_t ImmValue = 0;
+
+
+ if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
+ ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
+ float FloatValue = FPC->getValueAPF().convertToFloat();
+ if (FloatValue == 0.0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (FloatValue == 0.5) {
+ ImmReg = AMDGPU::HALF;
+ } else if (FloatValue == 1.0) {
+ ImmReg = AMDGPU::ONE;
+ } else {
+ ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+ }
+ } else {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
+ uint64_t Value = C->getZExtValue();
+ if (Value == 0) {
+ ImmReg = AMDGPU::ZERO;
+ } else if (Value == 1) {
+ ImmReg = AMDGPU::ONE_INT;
+ } else {
+ ImmValue = Value;
+ }
+ }
+
+ // Check that we aren't already using an immediate.
+ // XXX: It's possible for an instruction to have more than one
+ // immediate operand, but this is not supported yet.
+ if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ if (!Imm.getNode())
+ return false;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
+ assert(C);
+ if (C->getZExtValue())
+ return false;
+ Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
+ }
+ Src = DAG.getRegister(ImmReg, MVT::i32);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+
+/// \brief Fold the instructions after selecting them
+SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
+ if (!Node->isMachineOpcode())
+ return Node;
+ unsigned Opcode = Node->getMachineOpcode();
+ SDValue FakeOp;
+
+ std::vector<SDValue> Ops;
+ for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
+ I != E; ++I)
+ Ops.push_back(*I);
+
+ if (Opcode == AMDGPU::DOT_4) {
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
+ };
+ for (unsigned i = 0; i < 8; i++) {
+ if (OperandIdx[i] < 0)
+ return Node;
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue &Abs = Ops[AbsIdx[i] - 1];
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+ if (HasDst)
+ SelIdx--;
+ SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+ if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ } else if (Opcode == AMDGPU::REG_SEQUENCE) {
+ for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
+ SDValue &Src = Ops[i];
+ if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ } else if (Opcode == AMDGPU::CLAMP_R600) {
+ SDValue Src = Node->getOperand(0);
+ if (!Src.isMachineOpcode() ||
+ !TII->hasInstrModifiers(Src.getMachineOpcode()))
+ return Node;
+ int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
+ AMDGPU::OpName::clamp);
+ if (ClampIdx < 0)
+ return Node;
+ std::vector<SDValue> Ops;
+ unsigned NumOp = Src.getNumOperands();
+ for(unsigned i = 0; i < NumOp; ++i)
+ Ops.push_back(Src.getOperand(i));
+ Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
+ return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
+ Node->getVTList(), Ops);
+ } else {
+ if (!TII->hasInstrModifiers(Opcode))
+ return Node;
+ int OperandIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
+ };
+ int NegIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
+ };
+ int AbsIdx[] = {
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
+ TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
+ -1
+ };
+ for (unsigned i = 0; i < 3; i++) {
+ if (OperandIdx[i] < 0)
+ return Node;
+ SDValue &Src = Ops[OperandIdx[i] - 1];
+ SDValue &Neg = Ops[NegIdx[i] - 1];
+ SDValue FakeAbs;
+ SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
+ bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
+ int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
+ if (HasDst) {
+ SelIdx--;
+ ImmIdx--;
+ }
+ SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
+ SDValue &Imm = Ops[ImmIdx];
+ if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
+ return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
+ }
+ }
+
+ return Node;
+}
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index a033fcb..c10257e 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -56,11 +56,9 @@ private:
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
@@ -68,6 +66,7 @@ private:
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
unsigned &Channel, unsigned &PtrIncr) const;
bool isZero(SDValue Op) const;
+ virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
};
} // End namespace llvm;
diff --git a/lib/Target/R600/R600InstrFormats.td b/lib/Target/R600/R600InstrFormats.td
index 2d72404..9428bab 100644
--- a/lib/Target/R600/R600InstrFormats.td
+++ b/lib/Target/R600/R600InstrFormats.td
@@ -16,7 +16,6 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
: AMDGPUInst <outs, ins, asm, pattern> {
field bits<64> Inst;
- bit TransOnly = 0;
bit Trig = 0;
bit Op3 = 0;
bit isVector = 0;
@@ -29,6 +28,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
bit VTXInst = 0;
bit TEXInst = 0;
bit ALUInst = 0;
+ bit IsExport = 0;
+ bit LDS_1A2D = 0;
let Namespace = "AMDGPU";
let OutOperandList = outs;
@@ -37,7 +38,6 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let Pattern = pattern;
let Itinerary = itin;
- let TSFlags{0} = TransOnly;
let TSFlags{4} = Trig;
let TSFlags{5} = Op3;
@@ -53,6 +53,8 @@ class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
let TSFlags{14} = ALUInst;
let TSFlags{15} = LDS_1A;
let TSFlags{16} = LDS_1A1D;
+ let TSFlags{17} = IsExport;
+ let TSFlags{18} = LDS_1A2D;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 4e7eff9..c0827fc 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
using namespace llvm;
@@ -77,16 +77,16 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
- unsigned DstReg, int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addReg(AMDGPU::ALU_LITERAL_X);
- MIB.addImm(Imm);
- MIB.addReg(0); // PREDICATE_BIT
-
- return MI;
+/// \returns true if \p MBBI can be moved into a new basic.
+bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
+ E = MBBI->operands_end(); I != E; ++I) {
+ if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) &&
+ I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg()))
+ return false;
+ }
+ return true;
}
unsigned R600InstrInfo::getIEQOpcode() const {
@@ -149,17 +149,58 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
unsigned TargetFlags = get(Opcode).TSFlags;
return ((TargetFlags & R600_InstFlag::LDS_1A) |
- (TargetFlags & R600_InstFlag::LDS_1A1D));
+ (TargetFlags & R600_InstFlag::LDS_1A1D) |
+ (TargetFlags & R600_InstFlag::LDS_1A2D));
+}
+
+bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1;
+}
+
+bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+}
+
+bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
+ if (isALUInstr(MI->getOpcode()))
+ return true;
+ if (isVector(*MI) || isCubeOp(MI->getOpcode()))
+ return true;
+ switch (MI->getOpcode()) {
+ case AMDGPU::PRED_X:
+ case AMDGPU::INTERP_PAIR_XY:
+ case AMDGPU::INTERP_PAIR_ZW:
+ case AMDGPU::INTERP_VEC_LOAD:
+ case AMDGPU::COPY:
+ case AMDGPU::DOT_4:
+ return true;
+ default:
+ return false;
+ }
}
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
- return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+ if (ST.hasCaymanISA())
+ return false;
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
return isTransOnly(MI->getOpcode());
}
+bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
+ return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+}
+
+bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const {
+ return isVectorOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::isExport(unsigned Opcode) const {
+ return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
+}
+
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
return ST.hasVertexCache() && IS_VTX(get(Opcode));
}
@@ -189,6 +230,30 @@ bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
}
}
+bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const {
+ return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+}
+
+bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const {
+ if (!isALUInstr(MI->getOpcode())) {
+ return false;
+ }
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (!I->isReg() || !I->isUse() ||
+ TargetRegisterInfo::isVirtualRegister(I->getReg()))
+ continue;
+
+ if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+ return true;
+ }
+ return false;
+}
+
int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
static const unsigned OpTable[] = {
AMDGPU::OpName::src0,
@@ -321,6 +386,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
static std::vector<std::pair<int, unsigned> >
Swizzle(std::vector<std::pair<int, unsigned> > Src,
R600InstrInfo::BankSwizzle Swz) {
+ if (Src[0] == Src[1])
+ Src[1].first = -1;
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210:
break;
@@ -462,6 +529,9 @@ static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) {
+ // TransALU can't read 3 constants
+ if (ConstCount > 2)
+ return false;
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
@@ -615,6 +685,11 @@ bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
}
+static bool isBranch(unsigned Opcode) {
+ return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
+ Opcode == AMDGPU::BRANCH_COND_f32;
+}
+
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -633,6 +708,10 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
+ // AMDGPU::BRANCH* instructions are only available after isel and are not
+ // handled
+ if (isBranch(I->getOpcode()))
+ return true;
if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
@@ -942,6 +1021,20 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
return true;
}
+ if (MI->getOpcode() == AMDGPU::DOT_4) {
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z))
+ .setReg(Pred[2].getReg());
+ MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W))
+ .setReg(Pred[2].getReg());
+ MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
+ MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ return true;
+ }
+
if (PIdx != -1) {
MachineOperand &PMO = MI->getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
@@ -953,6 +1046,10 @@ R600InstrInfo::PredicateInstruction(MachineInstr *MI,
return false;
}
+unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const {
+ return 2;
+}
+
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost) const {
@@ -961,67 +1058,25 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
-int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- int Offset = 0;
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- if (MRI.livein_empty()) {
- return 0;
- }
-
- for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
- LE = MRI.livein_end();
- LI != LE; ++LI) {
- Offset = std::max(Offset,
- GET_REG_INDEX(RI.getEncodingValue(LI->first)));
- }
-
- return Offset + 1;
-}
-
-int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- int Offset = 0;
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Variable sized objects are not supported
- assert(!MFI->hasVarSizedObjects());
-
- if (MFI->getNumObjects() == 0) {
- return -1;
- }
-
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
-
- return getIndirectIndexBegin(MF) + Offset;
-}
-
-std::vector<unsigned> R600InstrInfo::getIndirectReservedRegs(
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
- std::vector<unsigned> Regs;
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
- if (End == -1) {
- return Regs;
- }
+ if (End == -1)
+ return;
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
- Regs.push_back(SuperReg);
+ Reserved.set(SuperReg);
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
- Regs.push_back(Reg);
+ Reserved.set(Reg);
}
}
- return Regs;
}
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
@@ -1031,13 +1086,8 @@ unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
return RegIndex;
}
-const TargetRegisterClass * R600InstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- return &AMDGPU::R600_TReg32RegClass;
-}
-
-const TargetRegisterClass *R600InstrInfo::getIndirectAddrLoadRegClass() const {
- return &AMDGPU::TRegMemRegClass;
+const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1076,10 +1126,6 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
return Mov;
}
-const TargetRegisterClass *R600InstrInfo::getSuperIndirectRegClass() const {
- return &AMDGPU::IndirectRegRegClass;
-}
-
unsigned R600InstrInfo::getMaxAlusPerClause() const {
return 115;
}
@@ -1197,6 +1243,11 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
AMDGPU::OpName::src1_sel,
};
+ MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
+ getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
+ MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+ .setReg(MO.getReg());
+
for (unsigned i = 0; i < 14; i++) {
MachineOperand &MO = MI->getOperand(
getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
@@ -1217,6 +1268,12 @@ MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
return MovImm;
}
+MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const {
+ return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+}
+
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
return getOperandIdx(MI.getOpcode(), Op);
}
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index cdaa2fb..13d9810 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -55,6 +55,8 @@ namespace llvm {
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
bool isTrig(const MachineInstr &MI) const;
bool isPlaceHolderOpcode(unsigned opcode) const;
@@ -65,9 +67,18 @@ namespace llvm {
bool isALUInstr(unsigned Opcode) const;
bool hasInstrModifiers(unsigned Opcode) const;
bool isLDSInstr(unsigned Opcode) const;
+ bool isLDSNoRetInstr(unsigned Opcode) const;
+ bool isLDSRetInstr(unsigned Opcode) const;
+
+ /// \returns true if this \p Opcode represents an ALU instruction or an
+ /// instruction that will be lowered in ExpandSpecialInstrs Pass.
+ bool canBeConsideredALU(const MachineInstr *MI) const;
bool isTransOnly(unsigned Opcode) const;
bool isTransOnly(const MachineInstr *MI) const;
+ bool isVectorOnly(unsigned Opcode) const;
+ bool isVectorOnly(const MachineInstr *MI) const;
+ bool isExport(unsigned Opcode) const;
bool usesVertexCache(unsigned Opcode) const;
bool usesVertexCache(const MachineInstr *MI) const;
@@ -75,6 +86,9 @@ namespace llvm {
bool usesTextureCache(const MachineInstr *MI) const;
bool mustBeLastInClause(unsigned Opcode) const;
+ bool usesAddressRegister(MachineInstr *MI) const;
+ bool definesAddressRegister(MachineInstr *MI) const;
+ bool readsLDSSrcReg(const MachineInstr *MI) const;
/// \returns The operand index for the given source number. Legal values
/// for SrcNum are 0, 1, and 2.
@@ -128,9 +142,6 @@ namespace llvm {
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
virtual unsigned getIEQOpcode() const;
virtual bool isMov(unsigned Opcode) const;
@@ -177,6 +188,8 @@ namespace llvm {
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
+ unsigned int getPredicationCost(const MachineInstr *) const;
+
unsigned int getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
unsigned *PredCost = 0) const;
@@ -184,22 +197,14 @@ namespace llvm {
virtual int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const { return 1;}
- /// \returns a list of all the registers that may be accesed using indirect
- /// addressing.
- std::vector<unsigned> getIndirectReservedRegs(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
-
+ /// \brief Reserve the registers that may be accesed using indirect addressing.
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const;
-
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -211,8 +216,6 @@ namespace llvm {
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const;
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
-
unsigned getMaxAlusPerClause() const;
///buildDefaultInstruction - This function returns a MachineInstr with
@@ -239,6 +242,10 @@ namespace llvm {
unsigned DstReg,
uint64_t Imm) const;
+ MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const;
+
/// \brief Get the index of Op in the MachineInstr.
///
/// \returns -1 if the Instruction does not contain the specified \p Op.
@@ -272,6 +279,12 @@ namespace llvm {
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
};
+namespace AMDGPU {
+
+int getLDSNoRetOp(uint16_t Opcode);
+
+} //End namespace AMDGPU
+
} // End llvm namespace
#endif // R600INSTRINFO_H_
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 7e61b18..0346e24 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -75,7 +75,6 @@ def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
-def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
@@ -230,7 +229,7 @@ def TEX_RECT : PatLeaf<
def TEX_ARRAY : PatLeaf<
(imm),
[{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 9 || TType == 10 || TType == 15 || TType == 16;
+ return TType == 9 || TType == 10 || TType == 16;
}]
>;
@@ -241,12 +240,26 @@ def TEX_SHADOW_ARRAY : PatLeaf<
}]
>;
-class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
- dag ins, string asm, list<dag> pattern> :
+def TEX_MSAA : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 14;
+ }]
+>;
+
+def TEX_ARRAY_MSAA : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 15;
+ }]
+>;
+
+class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
+ dag outs, dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
CF_ALLOC_EXPORT_WORD0_RAT, CF_ALLOC_EXPORT_WORD1_BUF {
- let rat_id = 0;
+ let rat_id = ratid;
let rat_inst = ratinst;
let rim = 0;
// XXX: Have a separate instruction for non-indexed writes.
@@ -264,6 +277,7 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
@@ -403,7 +417,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst",
- []>;
+ [(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
@@ -537,6 +551,7 @@ class ExportSwzInst : InstR600ISA<(
let elem_size = 3;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
} // End usesCustomInserter = 1
@@ -550,6 +565,7 @@ class ExportBufInst : InstR600ISA<(
let elem_size = 0;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
+ let IsExport = 1;
}
//===----------------------------------------------------------------------===//
@@ -573,6 +589,7 @@ i32imm:$COUNT, i32imm:$Enabled),
let ALT_CONST = 0;
let WHOLE_QUAD_MODE = 0;
let BARRIER = 1;
+ let UseNamedOperandTable = 1;
let Inst{31-0} = Word0;
let Inst{63-32} = Word1;
@@ -672,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
// XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
def SETE : R600_2OP <
0x08, "SETE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
>;
def SGT : R600_2OP <
0x09, "SETGT",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
>;
def SGE : R600_2OP <
0xA, "SETGE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
>;
def SNE : R600_2OP <
0xB, "SETNE",
- [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
+ [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
>;
def SETE_DX10 : R600_2OP <
0xC, "SETE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
>;
def SETGT_DX10 : R600_2OP <
0xD, "SETGT_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
>;
def SETGE_DX10 : R600_2OP <
0xE, "SETGE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
>;
def SETNE_DX10 : R600_2OP <
0xF, "SETNE_DX10",
- [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
+ [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -805,12 +822,12 @@ def CNDE_INT : R600_3OP <
def CNDGE_INT : R600_3OP <
0x1E, "CNDGE_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGE))]
>;
def CNDGT_INT : R600_3OP <
0x1D, "CNDGT_INT",
- [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
+ [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_SGT))]
>;
//===----------------------------------------------------------------------===//
@@ -863,6 +880,9 @@ def TEX_SAMPLE_C_L : R600_TEX <0x19, "TEX_SAMPLE_C_L">;
def TEX_SAMPLE_LB : R600_TEX <0x12, "TEX_SAMPLE_LB">;
def TEX_SAMPLE_C_LB : R600_TEX <0x1A, "TEX_SAMPLE_C_LB">;
def TEX_LD : R600_TEX <0x03, "TEX_LD">;
+def TEX_LDPTR : R600_TEX <0x03, "TEX_LDPTR"> {
+ let INST_MOD = 1;
+}
def TEX_GET_TEXTURE_RESINFO : R600_TEX <0x04, "TEX_GET_TEXTURE_RESINFO">;
def TEX_GET_GRADIENTS_H : R600_TEX <0x07, "TEX_GET_GRADIENTS_H">;
def TEX_GET_GRADIENTS_V : R600_TEX <0x08, "TEX_GET_GRADIENTS_V">;
@@ -881,6 +901,7 @@ defm : TexPattern<6, TEX_LD, v4i32>;
defm : TexPattern<7, TEX_GET_TEXTURE_RESINFO, v4i32>;
defm : TexPattern<8, TEX_GET_GRADIENTS_H>;
defm : TexPattern<9, TEX_GET_GRADIENTS_V>;
+defm : TexPattern<10, TEX_LDPTR, v4i32>;
//===----------------------------------------------------------------------===//
// Helper classes for common instructions
@@ -903,18 +924,22 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
class CNDE_Common <bits<5> inst> : R600_3OP <
inst, "CNDE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
>;
class CNDGT_Common <bits<5> inst> : R600_3OP <
inst, "CNDGT",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
->;
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
+> {
+ let Itinerary = VecALU;
+}
class CNDGE_Common <bits<5> inst> : R600_3OP <
inst, "CNDGE",
- [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
->;
+ [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
+> {
+ let Itinerary = VecALU;
+}
let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
@@ -984,35 +1009,30 @@ multiclass CUBE_Common <bits<11> inst> {
class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "EXP_IEEE", fexp2
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_INT", fp_to_sint
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "INT_TO_FLT", sint_to_fp
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "FLT_TO_UINT", fp_to_uint
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "UINT_TO_FLT", uint_to_fp
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
@@ -1023,7 +1043,6 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
inst, "LOG_IEEE", flog2
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
@@ -1033,75 +1052,68 @@ class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI_INT", mulhs
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULHI", mulhu
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
inst, "MULLO_INT", mul
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_CLAMPED", []
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIP_UINT", AMDGPUurecip
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
inst, "RECIPSQRT_IEEE", []
> {
- let TransOnly = 1;
let Itinerary = TransALU;
}
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
let Trig = 1;
- let TransOnly = 1;
let Itinerary = TransALU;
}
class COS_Common <bits<11> inst> : R600_1OP <
inst, "COS", [(set f32:$dst, (COS_HW f32:$src0))]> {
let Trig = 1;
- let TransOnly = 1;
let Itinerary = TransALU;
}
+def CLAMP_R600 : CLAMP <R600_Reg32>;
+def FABS_R600 : FABS<R600_Reg32>;
+def FNEG_R600 : FNEG<R600_Reg32>;
+
//===----------------------------------------------------------------------===//
// Helper patterns for complex intrinsics
//===----------------------------------------------------------------------===//
@@ -1124,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
>;
+// FROUND pattern
+class FROUNDPat<Instruction CNDGE> : Pat <
+ (AMDGPUround f32:$x),
+ (CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
+>;
+
+
//===----------------------------------------------------------------------===//
// R600 / R700 Instructions
//===----------------------------------------------------------------------===//
@@ -1165,11 +1184,12 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
+ def : FROUNDPat <CNDGE_r600>;
def R600_ExportSwz : ExportSwzInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
- let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
@@ -1178,7 +1198,7 @@ let Predicates = [isR600] in {
def R600_ExportBuf : ExportBufInst {
let Word1{20-17} = 0; // BURST_COUNT
let Word1{21} = eop;
- let Word1{22} = 1; // VALID_PIXEL_MODE
+ let Word1{22} = 0; // VALID_PIXEL_MODE
let Word1{30-23} = inst;
let Word1{31} = 1; // BARRIER
}
@@ -1247,6 +1267,33 @@ let Predicates = [isR700] in {
}
//===----------------------------------------------------------------------===//
+// Evergreen / Cayman store instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isEGorCayman] in {
+
+class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
+ string name, list<dag> pattern>
+ : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
+ "MEM_RAT_CACHELESS "#name, pattern>;
+
+class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
+ list<dag> pattern>
+ : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
+ "MEM_RAT "#name, pattern>;
+
+def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "MSKOR $rw_gpr.XW, $index_gpr",
+ [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
+> {
+ let eop = 0;
+}
+
+} // End Predicates = [isEGorCayman]
+
+
+//===----------------------------------------------------------------------===//
// Evergreen Only instructions
//===----------------------------------------------------------------------===//
@@ -1274,36 +1321,32 @@ def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1 in {
-class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> mask, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x57, 0x2, mask, (outs), ins, name, pattern> {
-}
-
-} // End usesCustomInserter = 1
+let usesCustomInserter = 1 in {
// 32-bit store
-def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_32_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x1,
(ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0x1, "RAT_WRITE_CACHELESS_32_eg $rw_gpr, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr, $index_gpr, $eop",
[(global_store i32:$rw_gpr, i32:$index_gpr)]
>;
// 64-bit store
-def RAT_WRITE_CACHELESS_64_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_64_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0x3,
(ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0x3, "RAT_WRITE_CACHELESS_64_eg $rw_gpr.XY, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr.XY, $index_gpr, $eop",
[(global_store v2i32:$rw_gpr, i32:$index_gpr)]
>;
//128-bit store
-def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
+def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
- 0xf, "RAT_WRITE_CACHELESS_128 $rw_gpr.XYZW, $index_gpr, $eop",
+ "STORE_RAW $rw_gpr.XYZW, $index_gpr, $eop",
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
+} // End usesCustomInserter = 1
+
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
@@ -1508,7 +1551,6 @@ let hasSideEffects = 1 in {
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
let Pattern = [];
- let TransOnly = 0;
let Itinerary = AnyALU;
}
@@ -1600,29 +1642,83 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
let DisableEncoding = "$dst";
}
-class R600_LDS_1A1D <bits<6> lds_op, string name, list<dag> pattern> :
+class R600_LDS_1A1D <bits<6> lds_op, dag outs, string name, list<dag> pattern,
+ string dst =""> :
R600_LDS <
- lds_op,
- (outs),
+ lds_op, outs,
(ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
LAST:$last, R600_Pred:$pred_sel,
BANK_SWIZZLE:$bank_swizzle),
- " "#name#" $last $src0$src0_rel, $src1$src1_rel, $pred_sel",
+ " "#name#" $last "#dst#"$src0$src0_rel, $src1$src1_rel, $pred_sel",
pattern
> {
+ field string BaseOp;
+
let src2 = 0;
let src2_rel = 0;
let LDS_1A1D = 1;
}
+class R600_LDS_1A1D_NORET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs), name, pattern> {
+ let BaseOp = name;
+}
+
+class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS_1A1D <lds_op, (outs R600_Reg32:$dst), name##"_RET", pattern, "OQAP, "> {
+
+ let BaseOp = name;
+ let usesCustomInserter = 1;
+ let DisableEncoding = "$dst";
+ let Defs = [OQAP];
+}
+
+class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
+ R600_LDS <
+ lds_op,
+ (outs),
+ (ins R600_Reg32:$src0, REL:$src0_rel, SEL:$src0_sel,
+ R600_Reg32:$src1, REL:$src1_rel, SEL:$src1_sel,
+ R600_Reg32:$src2, REL:$src2_rel, SEL:$src2_sel,
+ LAST:$last, R600_Pred:$pred_sel, BANK_SWIZZLE:$bank_swizzle),
+ " "#name# "$last $src0$src0_rel, $src1$src1_rel, $src2$src2_rel, $pred_sel",
+ pattern> {
+ let LDS_1A2D = 1;
+}
+
+def LDS_ADD : R600_LDS_1A1D_NORET <0x0, "LDS_ADD", [] >;
+def LDS_SUB : R600_LDS_1A1D_NORET <0x1, "LDS_SUB", [] >;
+def LDS_WRITE : R600_LDS_1A1D_NORET <0xD, "LDS_WRITE",
+ [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
+>;
+def LDS_BYTE_WRITE : R600_LDS_1A1D_NORET<0x12, "LDS_BYTE_WRITE",
+ [(truncstorei8_local i32:$src1, i32:$src0)]
+>;
+def LDS_SHORT_WRITE : R600_LDS_1A1D_NORET<0x13, "LDS_SHORT_WRITE",
+ [(truncstorei16_local i32:$src1, i32:$src0)]
+>;
+def LDS_ADD_RET : R600_LDS_1A1D_RET <0x20, "LDS_ADD",
+ [(set i32:$dst, (atomic_load_add_local i32:$src0, i32:$src1))]
+>;
+def LDS_SUB_RET : R600_LDS_1A1D_RET <0x21, "LDS_SUB",
+ [(set i32:$dst, (atomic_load_sub_local i32:$src0, i32:$src1))]
+>;
def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET",
[(set (i32 R600_Reg32:$dst), (local_load R600_Reg32:$src0))]
>;
-
-def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
- [(local_store (i32 R600_Reg32:$src1), R600_Reg32:$src0)]
+def LDS_BYTE_READ_RET : R600_LDS_1A <0x36, "LDS_BYTE_READ_RET",
+ [(set i32:$dst, (sextloadi8_local i32:$src0))]
+>;
+def LDS_UBYTE_READ_RET : R600_LDS_1A <0x37, "LDS_UBYTE_READ_RET",
+ [(set i32:$dst, (az_extloadi8_local i32:$src0))]
+>;
+def LDS_SHORT_READ_RET : R600_LDS_1A <0x38, "LDS_SHORT_READ_RET",
+ [(set i32:$dst, (sextloadi16_local i32:$src0))]
+>;
+def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
+ [(set i32:$dst, (az_extloadi16_local i32:$src0))]
>;
// TRUNC is used for the FLT_TO_INT instructions to work around a
@@ -1642,9 +1738,11 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
// SHA-256 Patterns
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
+ def : FROUNDPat <CNDGE_eg>;
+
def EG_ExportSwz : ExportSwzInst {
let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
@@ -1654,7 +1752,7 @@ def LDS_WRITE : R600_LDS_1A1D <0xD, "LDS_WRITE",
def EG_ExportBuf : ExportBufInst {
let Word1{19-16} = 0; // BURST_COUNT
- let Word1{20} = 1; // VALID_PIXEL_MODE
+ let Word1{20} = 0; // VALID_PIXEL_MODE
let Word1{21} = eop;
let Word1{29-22} = inst;
let Word1{30} = 0; // MARK
@@ -1771,23 +1869,17 @@ def : Pat <
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
-
-class RAT_STORE_DWORD_cm <bits<4> mask, dag ins, list<dag> pat> : EG_CF_RAT <
- 0x57, 0x14, mask, (outs), ins,
- "EXPORT_RAT_INST_STORE_DWORD $rw_gpr, $index_gpr", pat
-> {
+class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
+ CF_MEM_RAT_CACHELESS <0x14, 0, mask,
+ (ins rc:$rw_gpr, R600_TReg32_X:$index_gpr),
+ "STORE_DWORD $rw_gpr, $index_gpr",
+ [(global_store vt:$rw_gpr, i32:$index_gpr)]> {
let eop = 0; // This bit is not used on Cayman.
}
-def RAT_STORE_DWORD32_cm : RAT_STORE_DWORD_cm <0x1,
- (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr),
- [(global_store i32:$rw_gpr, i32:$index_gpr)]
->;
-
-def RAT_STORE_DWORD64_cm : RAT_STORE_DWORD_cm <0x3,
- (ins R600_Reg64:$rw_gpr, R600_TReg32_X:$index_gpr),
- [(global_store v2i32:$rw_gpr, i32:$index_gpr)]
->;
+def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
+def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
+def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
@@ -2012,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
} // End isPseudo = 1
} // End usesCustomInserter = 1
-def CLAMP_R600 : CLAMP <R600_Reg32>;
-def FABS_R600 : FABS<R600_Reg32>;
-def FNEG_R600 : FNEG<R600_Reg32>;
-
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
@@ -2164,7 +2252,7 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
- defm BRANCH_COND : BranchConditional<IL_brcond>;
+ defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
}
//===---------------------------------------------------------------------===//
@@ -2235,7 +2323,7 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
//CNDGE_INT extra pattern
def : Pat <
- (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
+ (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT),
(CNDGE_INT $src0, $src1, $src2)
>;
@@ -2250,86 +2338,6 @@ def KIL : Pat <
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;
-// SGT Reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
- (SGT $src1, $src0)
->;
-
-// SGE Reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
- (SGE $src1, $src0)
->;
-
-// SETGT_DX10 reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
- (SETGT_DX10 $src1, $src0)
->;
-
-// SETGE_DX10 reverse args
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
- (SETGE_DX10 $src1, $src0)
->;
-
-// SETGT_INT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
- (SETGT_INT $src1, $src0)
->;
-
-// SETGE_INT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
- (SETGE_INT $src1, $src0)
->;
-
-// SETGT_UINT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
- (SETGT_UINT $src1, $src0)
->;
-
-// SETGE_UINT reverse args
-def : Pat <
- (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
- (SETGE_UINT $src1, $src0)
->;
-
-// The next two patterns are special cases for handling 'true if ordered' and
-// 'true if unordered' conditionals. The assumption here is that the behavior of
-// SETE and SNE conforms to the Direct3D 10 rules for floating point values
-// described here:
-// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
-// We assume that SETE returns false when one of the operands is NAN and
-// SNE returns true when on of the operands is NAN
-
-//SETE - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
- (SETE $src0, $src1)
->;
-
-//SETE_DX10 - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
- (SETE_DX10 $src0, $src1)
->;
-
-//SNE - 'true if unordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
- (SNE $src0, $src1)
->;
-
-//SETNE_DX10 - 'true if ordered'
-def : Pat <
- (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
- (SETNE_DX10 $src0, $src1)
->;
-
def : Extract_Element <f32, v4f32, 0, sub0>;
def : Extract_Element <f32, v4f32, 1, sub1>;
def : Extract_Element <f32, v4f32, 2, sub2>;
@@ -2378,3 +2386,11 @@ def : BitConvert <v4i32, v4f32, R600_Reg128>;
def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
+
+def getLDSNoRetOp : InstrMapping {
+ let FilterClass = "R600_LDS_1A1D";
+ let RowFields = ["BaseOp"];
+ let ColFields = ["DisableEncoding"];
+ let KeyCol = ["$dst"];
+ let ValueCols = [[""""]];
+}
diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td
index 58d86b6..9681747 100644
--- a/lib/Target/R600/R600Intrinsics.td
+++ b/lib/Target/R600/R600Intrinsics.td
@@ -43,6 +43,12 @@ let TargetPrefix = "R600", isTarget = 1 in {
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_R600_interp_input :
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_R600_interp_const :
+ Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_R600_interp_xy :
+ Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+def int_R600_interp_zw :
+ Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_R600_load_texbuf :
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_R600_tex : TextureIntrinsicFloatInput;
@@ -52,6 +58,7 @@ let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_txb : TextureIntrinsicFloatInput;
def int_R600_txbc : TextureIntrinsicFloatInput;
def int_R600_txf : TextureIntrinsicInt32Input;
+ def int_R600_ldptr : TextureIntrinsicInt32Input;
def int_R600_txq : TextureIntrinsicInt32Input;
def int_R600_ddx : TextureIntrinsicFloatInput;
def int_R600_ddy : TextureIntrinsicFloatInput;
diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp
index 018b403..01105c6 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.cpp
+++ b/lib/Target/R600/R600MachineFunctionInfo.cpp
@@ -12,7 +12,9 @@
using namespace llvm;
-R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
- : AMDGPUMachineFunction(MF) { }
+// Pin the vtable to this file.
+void R600MachineFunctionInfo::anchor() {}
+R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
+ : AMDGPUMachineFunction(MF) { }
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index f23d9b7..c1bec0a 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -21,6 +21,7 @@
namespace llvm {
class R600MachineFunctionInfo : public AMDGPUMachineFunction {
+ virtual void anchor();
public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index 0dc0365..da2a4d8 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -9,7 +9,6 @@
//
/// \file
/// \brief R600 Machine Scheduler interface
-// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
//
//===----------------------------------------------------------------------===//
@@ -29,6 +28,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
DAG = dag;
TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
+ VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
@@ -92,15 +92,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
AllowSwitchFromAlu = true;
}
-
- // We want to scheduled AR defs as soon as possible to make sure they aren't
- // put in a different ALU clause from their uses.
- if (!SU && !UnscheduledARDefs.empty()) {
- SU = UnscheduledARDefs[0];
- UnscheduledARDefs.erase(UnscheduledARDefs.begin());
- NextInstKind = IDAlu;
- }
-
if (!SU && ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
(!AllowSwitchFromAlu && CurInstKind == IDAlu))) {
// try to pick ALU
@@ -130,15 +121,6 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
NextInstKind = IDOther;
}
- // We want to schedule the AR uses as late as possible to make sure that
- // the AR defs have been released.
- if (!SU && !UnscheduledARUses.empty()) {
- SU = UnscheduledARUses[0];
- UnscheduledARUses.erase(UnscheduledARUses.begin());
- NextInstKind = IDAlu;
- }
-
-
DEBUG(
if (SU) {
dbgs() << " ** Pick node **\n";
@@ -217,20 +199,6 @@ void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
int IK = getInstKind(SU);
- // Check for AR register defines
- for (MachineInstr::const_mop_iterator I = SU->getInstr()->operands_begin(),
- E = SU->getInstr()->operands_end();
- I != E; ++I) {
- if (I->isReg() && I->getReg() == AMDGPU::AR_X) {
- if (I->isDef()) {
- UnscheduledARDefs.push_back(SU);
- } else {
- UnscheduledARUses.push_back(SU);
- }
- return;
- }
- }
-
// There is no export clause, we can schedule one as soon as its ready
if (IK == IDOther)
Available[IDOther].push_back(SU);
@@ -314,6 +282,10 @@ R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
return AluT_XYZW;
+ // LDS src registers cannot be used in the Trans slot.
+ if (TII->readsLDSSrcReg(MI))
+ return AluT_XYZW;
+
return AluAny;
}
@@ -342,14 +314,16 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
}
}
-SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
+SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
if (Q.empty())
return NULL;
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
It != E; ++It) {
SUnit *SU = *It;
InstructionsGroupCandidate.push_back(SU->getInstr());
- if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
+ if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)
+ && (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
+ ) {
InstructionsGroupCandidate.pop_back();
Q.erase((It + 1).base());
return SU;
@@ -373,6 +347,8 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0;
+// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+// OccupedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -409,12 +385,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
}
}
-SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
+SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) {
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
- SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
+ SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu);
if (SlotedSU)
return SlotedSU;
- SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
+ SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu);
if (UnslotedSU)
AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU;
@@ -434,30 +410,35 @@ SUnit* R600SchedStrategy::pickAlu() {
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluPredX]);
+ return PopInst(AvailableAlus[AluPredX], false);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask |= 31;
- return PopInst(AvailableAlus[AluDiscarded]);
+ return PopInst(AvailableAlus[AluDiscarded], false);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask |= 15;
- return PopInst(AvailableAlus[AluT_XYZW]);
+ return PopInst(AvailableAlus[AluT_XYZW], false);
}
}
bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped) {
+ if (!TransSlotOccuped && VLIW5) {
if (!AvailableAlus[AluTrans].empty()) {
OccupedSlotsMask |= 16;
- return PopInst(AvailableAlus[AluTrans]);
+ return PopInst(AvailableAlus[AluTrans], false);
+ }
+ SUnit *SU = AttemptFillSlot(3, true);
+ if (SU) {
+ OccupedSlotsMask |= 16;
+ return SU;
}
}
for (int Chan = 3; Chan > -1; --Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) {
- SUnit *SU = AttemptFillSlot(Chan);
+ SUnit *SU = AttemptFillSlot(Chan, false);
if (SU) {
OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr());
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index f8965d8..97c8cde 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -53,8 +53,6 @@ class R600SchedStrategy : public MachineSchedStrategy {
std::vector<SUnit *> Available[IDLast], Pending[IDLast];
std::vector<SUnit *> AvailableAlus[AluLast];
- std::vector<SUnit *> UnscheduledARDefs;
- std::vector<SUnit *> UnscheduledARUses;
std::vector<SUnit *> PhysicalRegCopy;
InstKind CurInstKind;
@@ -84,15 +82,16 @@ public:
private:
std::vector<MachineInstr *> InstructionsGroupCandidate;
+ bool VLIW5;
int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const;
void LoadAlu();
unsigned AvailablesAluCount() const;
- SUnit *AttemptFillSlot (unsigned Slot);
+ SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu);
void PrepareNextSlot();
- SUnit *PopInst(std::vector<SUnit*> &Q);
+ SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU);
void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu();
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index acacffa..cf719c0 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -50,6 +50,9 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
E = MRI.def_end(); It != E; ++It) {
return (*It).isImplicitDef();
}
+ if (MRI.isReserved(Reg)) {
+ return false;
+ }
llvm_unreachable("Reg without a def");
return false;
}
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index 5cf1fd3..cd9b6ea 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -58,6 +58,8 @@ class R600PacketizerList : public VLIWPacketizerList {
private:
const R600InstrInfo *TII;
const R600RegisterInfo &TRI;
+ bool VLIW5;
+ bool ConsideredInstUsesAlreadyWrittenVectorElement;
unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg());
@@ -74,7 +76,13 @@ private:
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
if (I->isBundle())
BI++;
+ int LastDstChan = -1;
do {
+ bool isTrans = false;
+ int BISlot = getSlot(BI);
+ if (LastDstChan >= BISlot)
+ isTrans = true;
+ LastDstChan = BISlot;
if (TII->isPredicated(BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@@ -85,7 +93,7 @@ private:
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (TII->isTransOnly(BI)) {
+ if (isTrans || TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
@@ -142,10 +150,14 @@ public:
MachineDominatorTree &MDT)
: VLIWPacketizerList(MF, MLI, MDT, true),
TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
- TRI(TII->getRegisterInfo()) { }
+ TRI(TII->getRegisterInfo()) {
+ VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
+ }
// initPacketizerState - initialize some internal flags.
- void initPacketizerState() { }
+ void initPacketizerState() {
+ ConsideredInstUsesAlreadyWrittenVectorElement = false;
+ }
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@@ -172,8 +184,8 @@ public:
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
- if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
- return false;
+ if (getSlot(MII) == getSlot(MIJ))
+ ConsideredInstUsesAlreadyWrittenVectorElement = true;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@@ -194,6 +206,14 @@ public:
return false;
}
}
+
+ bool ARDef = TII->definesAddressRegister(MII) ||
+ TII->definesAddressRegister(MIJ);
+ bool ARUse = TII->usesAddressRegister(MII) ||
+ TII->usesAddressRegister(MIJ);
+ if (ARDef && ARUse)
+ return false;
+
return true;
}
@@ -211,6 +231,20 @@ public:
std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI);
+ assert (!isTransSlot || VLIW5);
+
+ // Is the dst reg sequence legal ?
+ if (!isTransSlot && !CurrentPacketMIs.empty()) {
+ if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
+ if (ConsideredInstUsesAlreadyWrittenVectorElement &&
+ !TII->isVectorOnly(MI) && VLIW5) {
+ isTransSlot = true;
+ DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
+ }
+ else
+ return false;
+ }
+ }
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI);
@@ -246,6 +280,10 @@ public:
return false;
}
+ // We cannot read LDS source registrs from the Trans slot.
+ if (isTransSlot && TII->readsLDSSrcReg(MI))
+ return false;
+
CurrentPacketMIs.pop_back();
return true;
}
@@ -278,6 +316,8 @@ public:
return It;
}
endPacket(MI->getParent(), MI);
+ if (TII->isTransOnly(MI))
+ return MI;
return VLIWPacketizerList::addToPacket(MI);
}
};
@@ -308,7 +348,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
- if (MI->isKill() ||
+ if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
(MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index a42043b..f3bb88b 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -28,6 +28,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
Reserved.set(AMDGPU::ONE);
@@ -41,26 +43,15 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::PRED_SEL_OFF);
Reserved.set(AMDGPU::PRED_SEL_ZERO);
Reserved.set(AMDGPU::PRED_SEL_ONE);
+ Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
Reserved.set(*I);
}
- for (TargetRegisterClass::iterator I = AMDGPU::TRegMemRegClass.begin(),
- E = AMDGPU::TRegMemRegClass.end();
- I != E; ++I) {
- Reserved.set(*I);
- }
+ TII->reserveIndirectRegisters(Reserved, MF);
- const R600InstrInfo *RII =
- static_cast<const R600InstrInfo*>(TM.getInstrInfo());
- std::vector<unsigned> IndirectRegs = RII->getIndirectReservedRegs(MF);
- for (std::vector<unsigned>::iterator I = IndirectRegs.begin(),
- E = IndirectRegs.end();
- I != E; ++I) {
- Reserved.set(*I);
- }
return Reserved;
}
@@ -78,6 +69,10 @@ unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
+unsigned R600RegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return GET_REG_INDEX(getEncodingValue(Reg));
+}
+
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
@@ -86,17 +81,20 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
}
}
-unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
- switch (Channel) {
- default: assert(!"Invalid channel index"); return 0;
- case 0: return AMDGPU::sub0;
- case 1: return AMDGPU::sub1;
- case 2: return AMDGPU::sub2;
- case 3: return AMDGPU::sub3;
- }
-}
-
const RegClassWeight &R600RegisterInfo::getRegClassWeight(
const TargetRegisterClass *RC) const {
return RCW;
}
+
+bool R600RegisterInfo::isPhysRegLiveAcrossClauses(unsigned Reg) const {
+ assert(!TargetRegisterInfo::isVirtualRegister(Reg));
+
+ switch (Reg) {
+ case AMDGPU::OQAP:
+ case AMDGPU::OQBP:
+ case AMDGPU::AR_X:
+ return false;
+ default:
+ return true;
+ }
+}
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 9b286ee..c74c49e 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -39,16 +39,16 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
- /// \returns the sub reg enum value for the given \p Channel
- /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
- unsigned getSubRegFromChannel(unsigned Channel) const;
-
virtual const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
+ // \returns true if \p Reg can be defined in one ALU caluse and used in another.
+ virtual bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index fa987cf..68bcd20 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -39,8 +39,6 @@ foreach Index = 0-127 in {
// Indirect addressing offset registers
def Addr#Index#_#Chan : R600RegWithChan <"T("#Index#" + AR.x)."#Chan,
Index, Chan>;
- def TRegMem#Index#_#Chan : R600RegWithChan <"T"#Index#"."#Chan, Index,
- Chan>;
}
// 128-bit Temporary Registers
def T#Index#_XYZW : R600Reg_128 <"T"#Index#"",
@@ -95,6 +93,12 @@ foreach Index = 448-480 in {
// Special Registers
+def OQA : R600Reg<"OQA", 219>;
+def OQB : R600Reg<"OQB", 220>;
+def OQAP : R600Reg<"OQAP", 221>;
+def OQBP : R600Reg<"OQAP", 222>;
+def LDS_DIRECT_A : R600Reg<"LDS_DIRECT_A", 223>;
+def LDS_DIRECT_B : R600Reg<"LDS_DIRECT_B", 224>;
def ZERO : R600Reg<"0.0", 248>;
def ONE : R600Reg<"1.0", 249>;
def NEG_ONE : R600Reg<"-1.0", 249>;
@@ -115,7 +119,6 @@ def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
def PRED_SEL_ONE : R600Reg<"Pred_sel_one", 3>;
def AR_X : R600Reg<"AR.x", 0>;
-def OQAP : R600Reg<"OQAP", 221>;
def R600_ArrayBase : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "ArrayBase%u", 448, 480))>;
@@ -130,7 +133,8 @@ let isAllocatable = 0 in {
// XXX: Only use the X channel, until we support wider stack widths
def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
-} // End isAllocatable = 0
+def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
+ (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "KC0_%u_X", 128, 159))>;
@@ -164,6 +168,8 @@ def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32,
(interleave R600_KC1_X, R600_KC1_Y,
R600_KC1_Z, R600_KC1_W)>;
+} // End isAllocatable = 0
+
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32,
(add (sequence "T%u_X", 0, 127), AR_X)>;
@@ -184,6 +190,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
R600_TReg32,
R600_ArrayBase,
R600_Addr,
+ R600_KC0, R600_KC1,
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF,
ALU_CONST, ALU_PARAM, OQAP
)>;
@@ -201,33 +208,3 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add (sequence "T%u_XY", 0, 63))>;
-
-//===----------------------------------------------------------------------===//
-// Register classes for indirect addressing
-//===----------------------------------------------------------------------===//
-
-// Super register for all the Indirect Registers. This register class is used
-// by the REG_SEQUENCE instruction to specify the registers to use for direct
-// reads / writes which may be written / read by an indirect address.
-class IndirectSuper<string n, list<Register> subregs> :
- RegisterWithSubRegs<n, subregs> {
- let Namespace = "AMDGPU";
- let SubRegIndices =
- [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
- sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
-}
-
-def IndirectSuperReg : IndirectSuper<"Indirect",
- [TRegMem0_X, TRegMem1_X, TRegMem2_X, TRegMem3_X, TRegMem4_X, TRegMem5_X,
- TRegMem6_X, TRegMem7_X, TRegMem8_X, TRegMem9_X, TRegMem10_X, TRegMem11_X,
- TRegMem12_X, TRegMem13_X, TRegMem14_X, TRegMem15_X]
->;
-
-def IndirectReg : RegisterClass<"AMDGPU", [f32, i32], 32, (add IndirectSuperReg)>;
-
-// This register class defines the registers that are the storage units for
-// the "Indirect Addressing" pseudo memory space.
-// XXX: Only use the X channel, until we support wider stack widths
-def TRegMem : RegisterClass<"AMDGPU", [f32, i32], 32,
- (add (sequence "TRegMem%u_X", 0, 16))
->;
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 3768ba0..3258894 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -35,9 +35,9 @@ class R600TextureIntrinsicsReplacer :
FunctionType *TexSign;
FunctionType *TexQSign;
- void getAdjustementFromTextureTarget(unsigned TextureType, bool hasLOD,
- unsigned SrcSelect[4], unsigned CT[4],
- bool &useShadowVariant) {
+ void getAdjustmentFromTextureTarget(unsigned TextureType, bool hasLOD,
+ unsigned SrcSelect[4], unsigned CT[4],
+ bool &useShadowVariant) {
enum TextureTypes {
TEXTURE_1D = 1,
TEXTURE_2D,
@@ -60,6 +60,7 @@ class R600TextureIntrinsicsReplacer :
switch (TextureType) {
case 0:
+ useShadowVariant = false;
return;
case TEXTURE_RECT:
case TEXTURE_1D:
@@ -93,9 +94,8 @@ class R600TextureIntrinsicsReplacer :
}
if (TextureType == TEXTURE_CUBE_ARRAY ||
- TextureType == TEXTURE_SHADOWCUBE_ARRAY) {
+ TextureType == TEXTURE_SHADOWCUBE_ARRAY)
CT[2] = 0;
- }
if (TextureType == TEXTURE_1D_ARRAY ||
TextureType == TEXTURE_SHADOW1D_ARRAY) {
@@ -114,9 +114,8 @@ class R600TextureIntrinsicsReplacer :
TextureType == TEXTURE_SHADOW2D ||
TextureType == TEXTURE_SHADOWRECT ||
TextureType == TEXTURE_SHADOW1D_ARRAY) &&
- !(hasLOD && useShadowVariant)) {
+ !(hasLOD && useShadowVariant))
SrcSelect[3] = 2;
- }
}
void ReplaceCallInst(CallInst &I, FunctionType *FT, const char *Name,
@@ -174,8 +173,8 @@ class R600TextureIntrinsicsReplacer :
};
bool useShadowVariant;
- getAdjustementFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
- useShadowVariant);
+ getAdjustmentFromTextureTarget(TextureType, hasLOD, SrcSelect, CT,
+ useShadowVariant);
ReplaceCallInst(I, FT, useShadowVariant?ShadowInt:VanillaInt, SrcSelect,
Offset, ResourceId, SamplerId, CT, Coord);
@@ -198,8 +197,8 @@ class R600TextureIntrinsicsReplacer :
};
bool useShadowVariant;
- getAdjustementFromTextureTarget(TextureType, false, SrcSelect, CT,
- useShadowVariant);
+ getAdjustmentFromTextureTarget(TextureType, false, SrcSelect, CT,
+ useShadowVariant);
ReplaceCallInst(I, TexQSign, "llvm.R600.txf", SrcSelect,
Offset, ResourceId, SamplerId, CT, Coord);
@@ -259,6 +258,9 @@ public:
}
void visitCallInst(CallInst &I) {
+ if (!I.getCalledFunction())
+ return;
+
StringRef Name = I.getCalledFunction()->getName();
if (Name == "llvm.AMDGPU.tex") {
ReplaceTexIntrinsic(I, false, TexSign, "llvm.R600.tex", "llvm.R600.texc");
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 147578c..2cbce28 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -11,6 +11,18 @@
#ifndef SIDEFINES_H_
#define SIDEFINES_H_
+namespace SIInstrFlags {
+enum {
+ MIMG = 1 << 3,
+ SMRD = 1 << 4,
+ VOP1 = 1 << 5,
+ VOP2 = 1 << 6,
+ VOP3 = 1 << 7,
+ VOPC = 1 << 8,
+ SALU = 1 << 9
+};
+}
+
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C
#define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8)
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 435172a..3370c79 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -23,9 +23,9 @@
/// %vreg3 <vsrc> = COPY %vreg2 <vgpr>
/// BB2:
/// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+///
///
-///
/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
/// code will look like this:
///
@@ -43,7 +43,7 @@
/// Now that the result of the PHI instruction is an SGPR, the register
/// allocator is now forced to constrain the register class of %vreg3 to
/// <sgpr> so we end up with final code like this:
-///
+///
/// BB0:
/// %vreg0 <sgpr> = SCALAR_INST
/// ...
@@ -55,7 +55,7 @@
/// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
/// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
///
-/// Now this code contains an illegal copy from a VGPR to an SGPR.
+/// Now this code contains an illegal copy from a VGPR to an SGPR.
///
/// In order to avoid this problem, this pass searches for PHI instructions
/// which define a <vsrc> register and constrains its definition class to
@@ -65,10 +65,14 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -79,9 +83,16 @@ class SIFixSGPRCopies : public MachineFunctionPass {
private:
static char ID;
- const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI,
+ const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const;
+ unsigned Reg,
+ unsigned SubReg) const;
+ const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI,
+ unsigned Reg,
+ unsigned SubReg) const;
+ bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const;
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
@@ -102,25 +113,41 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
return new SIFixSGPRCopies(tm);
}
-/// This functions walks the use/def chains starting with the definition of
-/// \p Reg until it finds an Instruction that isn't a COPY returns
-/// the register class of that instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
- const TargetRegisterInfo *TRI,
+static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+ continue;
+
+ if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+ return true;
+ }
+ return false;
+}
+
+/// This functions walks the use list of Reg until it finds an Instruction
+/// that isn't a COPY returns the register class of that instruction.
+/// \return The register defined by the first non-COPY instruction.
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
+ const SIRegisterInfo *TRI,
const MachineRegisterInfo &MRI,
- unsigned Reg) const {
+ unsigned Reg,
+ unsigned SubReg) const {
// The Reg parameter to the function must always be defined by either a PHI
// or a COPY, therefore it cannot be a physical register.
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Reg cannot be a physical register");
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ RC = TRI->getSubRegClass(RC, SubReg);
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
E = MRI.use_end(); I != E; ++I) {
switch (I->getOpcode()) {
case AMDGPU::COPY:
- RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI,
- I->getOperand(0).getReg()));
+ RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
+ I->getOperand(0).getReg(),
+ I->getOperand(0).getSubReg()));
break;
}
}
@@ -128,9 +155,48 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
return RC;
}
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI,
+ unsigned Reg,
+ unsigned SubReg) const {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+ return TRI->getSubRegClass(RC, SubReg);
+ }
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def->getOpcode() != AMDGPU::COPY) {
+ return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
+ }
+
+ return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
+ Def->getOperand(1).getSubReg());
+}
+
+bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo &MRI) const {
+
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+ unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ const TargetRegisterClass *SrcRC;
+
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ DstRC == &AMDGPU::M0RegRegClass)
+ return false;
+
+ SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
+ return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -138,13 +204,58 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
- if (MI.getOpcode() != AMDGPU::PHI) {
- continue;
+ if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
+ DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
+ DEBUG(MI.print(dbgs()));
+ TII->moveToVALU(MI);
+
+ }
+
+ switch (MI.getOpcode()) {
+ default: continue;
+ case AMDGPU::PHI: {
+ DEBUG(dbgs() << " Fixing PHI:\n");
+ DEBUG(MI.print(dbgs()));
+
+ for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+ unsigned Reg = MI.getOperand(i).getReg();
+ const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
+ MI.getOperand(0).getSubReg());
+ MRI.constrainRegClass(Reg, RC);
+ }
+ unsigned Reg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
+ MI.getOperand(0).getSubReg());
+ if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
+ MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
+ }
+
+ if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+ break;
+
+ // If a PHI node defines an SGPR and any of its operands are VGPRs,
+ // then we need to move it to the VALU.
+ for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+ unsigned Reg = MI.getOperand(i).getReg();
+ if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
+ TII->moveToVALU(MI);
+ break;
+ }
+ }
+
+ break;
+ }
+ case AMDGPU::REG_SEQUENCE: {
+ if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
+ !hasVGPROperands(MI, TRI))
+ continue;
+
+ DEBUG(dbgs() << "Fixing REG_SEQUENCE:\n");
+ DEBUG(MI.print(dbgs()));
+
+ TII->moveToVALU(MI);
+ break;
}
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg);
- if (RC == &AMDGPU::VSrc_32RegClass) {
- MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
}
}
}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index c64027f..d5d2b68 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -34,18 +34,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
-
- addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
- addRegisterClass(MVT::v1i32, &AMDGPU::VSrc_32RegClass);
-
addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
@@ -62,6 +56,21 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
computeRegisterProperties();
+ // Condition Codes
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETULE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
@@ -69,6 +78,32 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADD, MVT::i64, Legal);
setOperationAction(ISD::ADD, MVT::i32, Legal);
+ setOperationAction(ISD::ADDC, MVT::i32, Legal);
+ setOperationAction(ISD::ADDE, MVT::i32, Legal);
+
+ setOperationAction(ISD::BITCAST, MVT::i128, Legal);
+
+ // We need to custom lower vector stores from local memory
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::v8i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v16i32, Custom);
+
+ // We need to custom lower loads/stores from private memory
+ setOperationAction(ISD::LOAD, MVT::i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::STORE, MVT::i32, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i128, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -78,14 +113,31 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -102,24 +154,28 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *IsFast) const {
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+ if (!VT.isSimple() || VT == MVT::Other)
+ return false;
return VT.bitsGT(MVT::i32);
}
+bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT.bitsLE(MVT::i16);
+}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
+SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain,
unsigned Offset) const {
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
- EVT ArgVT = MVT::getIntegerVT(VT.getSizeInBits());
SDValue BasePtr = DAG.getCopyFromReg(Chain, DL,
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getLoad(VT, DL, Chain, Ptr,
- MachinePointerInfo(UndefValue::get(PtrTy)),
- false, false, false, ArgVT.getSizeInBits() >> 3);
+ return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
+ false, false, MemVT.getSizeInBits() >> 3);
}
@@ -146,7 +202,8 @@ SDValue SITargetLowering::LowerFormalArguments(
const ISD::InputArg &Arg = Ins[i];
// First check if it's a PS input addr
- if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
+ if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
+ !Arg.Flags.isByVal()) {
assert((PSInputNum <= 15) && "Too many PS inputs!");
@@ -177,7 +234,7 @@ SDValue SITargetLowering::LowerFormalArguments(
NewArg.PartOffset += NewArg.VT.getStoreSize();
}
- } else {
+ } else if (Info->ShaderType != ShaderType::COMPUTE) {
Splits.push_back(Arg);
}
}
@@ -200,6 +257,11 @@ SDValue SITargetLowering::LowerFormalArguments(
MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
}
+ if (Info->ShaderType == ShaderType::COMPUTE) {
+ getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
+ Splits);
+ }
+
AnalyzeFormalArguments(CCInfo, Splits);
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
@@ -214,9 +276,11 @@ SDValue SITargetLowering::LowerFormalArguments(
EVT VT = VA.getLocVT();
if (VA.isMemLoc()) {
+ VT = Ins[i].VT;
+ EVT MemVT = Splits[i].VT;
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, DL, DAG.getRoot(),
+ SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
36 + VA.getLocMemOffset());
InVals.push_back(Arg);
continue;
@@ -320,6 +384,19 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
+ case AMDGPU::SI_RegisterStorePseudo: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ MachineInstrBuilder MIB =
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
+ Reg);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ MI->eraseFromParent();
+ }
}
return BB;
}
@@ -335,6 +412,24 @@ MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
return MVT::i32;
}
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ return false; /* There is V_MAD_F32 for f32 */
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Lowering Operations
//===----------------------------------------------------------------------===//
@@ -344,9 +439,27 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::ADD: return LowerADD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
+ case ISD::LOAD: {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ Op.getValueType().isVector()) {
+ SDValue MergedValues[2] = {
+ SplitVectorLoad(Op, DAG),
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ } else {
+ return LowerLOAD(Op, DAG);
+ }
+ }
+
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::ANY_EXTEND: // Fall-through
case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: {
@@ -359,23 +472,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 0);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 4);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 8);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 12);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 16);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 20);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 24);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 28);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, DL, DAG.getEntryNode(), 32);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
@@ -394,13 +507,102 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
AMDGPU::VGPR2, VT);
-
+ case AMDGPUIntrinsic::SI_load_const: {
+ SDValue Ops [] = {
+ ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(2)
+ };
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
+ VT.getSizeInBits() / 8, 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
+ Op->getVTList(), Ops, 2, VT, MMO);
+ }
+ case AMDGPUIntrinsic::SI_sample:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampleb:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
+ case AMDGPUIntrinsic::SI_sampled:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
+ case AMDGPUIntrinsic::SI_samplel:
+ return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
+ case AMDGPUIntrinsic::SI_vs_load_input:
+ return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
+ ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(2),
+ Op.getOperand(3));
}
}
+
+ case ISD::INTRINSIC_VOID:
+ SDValue Chain = Op.getOperand(0);
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+ switch (IntrinsicID) {
+ case AMDGPUIntrinsic::SI_tbuffer_store: {
+ SDLoc DL(Op);
+ SDValue Ops [] = {
+ Chain,
+ ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(3),
+ Op.getOperand(4),
+ Op.getOperand(5),
+ Op.getOperand(6),
+ Op.getOperand(7),
+ Op.getOperand(8),
+ Op.getOperand(9),
+ Op.getOperand(10),
+ Op.getOperand(11),
+ Op.getOperand(12),
+ Op.getOperand(13),
+ Op.getOperand(14)
+ };
+ EVT VT = Op.getOperand(3).getValueType();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VT.getSizeInBits() / 8, 4);
+ return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
+ Op->getVTList(), Ops,
+ sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
+ }
+ default:
+ break;
+ }
}
return SDValue();
}
+SDValue SITargetLowering::LowerADD(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType() != MVT::i64)
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero);
+ SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One);
+
+ SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero);
+ SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One);
+
+ SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue);
+
+ SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1);
+ SDValue Carry = AddLo.getValue(1);
+ SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry);
+
+ return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0));
+}
+
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -495,6 +697,53 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
return Chain;
}
+SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+
+ if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Load->getBasePtr(), DAG.getConstant(0, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+ Load->getChain(), Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+ SDValue MergedValues[2] = {
+ Ret,
+ Load->getChain()
+ };
+ return DAG.getMergeValues(MergedValues, 2, DL);
+
+}
+
+SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ if (Op.getValueType() == MVT::i128) {
+ return Op;
+ }
+
+ assert(Op.getOpcode() == ISD::UNDEF);
+
+ return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
+ DAG.getConstant(0, MVT::i64),
+ DAG.getConstant(0, MVT::i64));
+}
+
+SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
+ const SDValue &Op,
+ SelectionDAG &DAG) const {
+ return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2),
+ ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(4));
+}
+
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -529,6 +778,56 @@ SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
}
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT VT = Store->getMemoryVT();
+
+ SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Ret.getNode())
+ return Ret;
+
+ if (VT.isVector() && VT.getVectorNumElements() >= 8)
+ return SplitVectorStore(Op, DAG);
+
+ if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ return SDValue();
+
+ SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
+ DAG.getConstant(2, MVT::i32));
+ SDValue Chain = Store->getChain();
+ SmallVector<SDValue, 8> Values;
+
+ if (VT == MVT::i64) {
+ for (unsigned i = 0; i < 2; ++i) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)));
+ }
+ } else if (VT == MVT::i128) {
+ for (unsigned i = 0; i < 2; ++i) {
+ for (unsigned j = 0; j < 2; ++j) {
+ Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
+ Store->getValue(), DAG.getConstant(i, MVT::i32)),
+ DAG.getConstant(j, MVT::i32)));
+ }
+ }
+ } else {
+ Values.push_back(Store->getValue());
+ }
+
+ for (unsigned i = 0; i < Values.size(); ++i) {
+ SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
+ Ptr, DAG.getConstant(i, MVT::i32));
+ Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Values[i], PartPtr,
+ DAG.getTargetConstant(0, MVT::i32));
+ }
+ return Chain;
+}
+
+
SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -555,7 +854,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case ISD::SELECT_CC: {
- N->dump();
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
@@ -759,8 +1057,8 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
return;
}
- // This is a conservative aproach, it is possible that we can't determine
- // the correct register class and copy too often, but better save than sorry.
+ // This is a conservative aproach. It is possible that we can't determine the
+ // correct register class and copy too often, but better safe than sorry.
SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
Operand.getValueType(), Operand, RC);
@@ -937,7 +1235,9 @@ static unsigned SubIdx2Lane(unsigned Idx) {
void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
SelectionDAG &DAG) const {
SDNode *Users[4] = { };
- unsigned Writemask = 0, Lane = 0;
+ unsigned Lane = 0;
+ unsigned OldDmask = Node->getConstantOperandVal(0);
+ unsigned NewDmask = 0;
// Try to figure out the used register components
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
@@ -948,29 +1248,42 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
return;
+ // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
+ // Note that subregs are packed, i.e. Lane==0 is the first bit set
+ // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
+ // set, etc.
Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+ // Set which texture component corresponds to the lane.
+ unsigned Comp;
+ for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
+ assert(Dmask);
+ Comp = countTrailingZeros(Dmask);
+ Dmask &= ~(1 << Comp);
+ }
+
// Abort if we have more than one user per component
if (Users[Lane])
return;
Users[Lane] = *I;
- Writemask |= 1 << Lane;
+ NewDmask |= 1 << Comp;
}
- // Abort if all components are used
- if (Writemask == 0xf)
+ // Abort if there's no change
+ if (NewDmask == OldDmask)
return;
// Adjust the writemask in the node
std::vector<SDValue> Ops;
- Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(Node->getOperand(i));
Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
// If we only got one lane, replace it with a copy
- if (Writemask == (1U << Lane)) {
+ // (if NewDmask has only one bit set...)
+ if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
SDLoc(), Users[Lane]->getValueType(0),
@@ -1001,9 +1314,11 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
/// \brief Fold the instructions after slecting them
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
Node = AdjustRegClass(Node, DAG);
- if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+ if (TII->isMIMG(Node->getMachineOpcode()))
adjustWritemask(Node, DAG);
return foldOperands(Node, DAG);
@@ -1013,7 +1328,9 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
/// bits set in the writemask
void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
- if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ if (!TII->isMIMG(MI->getOpcode()))
return;
unsigned VReg = MI->getOperand(0).getReg();
@@ -1030,6 +1347,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
case 3: RC = &AMDGPU::VReg_96RegClass; break;
}
+ unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
+ MI->setDesc(TII->get(NewOpcode));
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MRI.setRegClass(VReg, RC);
}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index b4202c4..9933ece 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -21,13 +21,19 @@
namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL,
+ SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
SDValue Chain, unsigned Offset) const;
+ SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
+ SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
@@ -44,6 +50,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const;
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -55,6 +62,7 @@ public:
MachineBasicBlock * BB) const;
virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
virtual MVT getScalarShiftAmountTy(EVT VT) const;
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index ba202e3..7ef662e 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -134,14 +134,19 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
- MachineOperand &Op = MI.getOperand(0);
- if (!Op.isReg())
- Op = MI.getOperand(1);
- assert(Op.isReg() && "First LGKM operand must be a register!");
+ if (TII->isSMRD(MI.getOpcode())) {
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ // DS
+ Result.Named.LGKM = 1;
+ }
} else {
Result.Named.LGKM = 0;
@@ -181,7 +186,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
- if (!Op.isReg())
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return std::make_pair(0, 0);
unsigned Reg = Op.getReg();
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 434aa7e..53ebaaf 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -17,10 +17,24 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> VM_CNT = 0;
field bits<1> EXP_CNT = 0;
field bits<1> LGKM_CNT = 0;
+ field bits<1> MIMG = 0;
+ field bits<1> SMRD = 0;
+ field bits<1> VOP1 = 0;
+ field bits<1> VOP2 = 0;
+ field bits<1> VOP3 = 0;
+ field bits<1> VOPC = 0;
+ field bits<1> SALU = 0;
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
let TSFlags{2} = LGKM_CNT;
+ let TSFlags{3} = MIMG;
+ let TSFlags{4} = SMRD;
+ let TSFlags{5} = VOP1;
+ let TSFlags{6} = VOP2;
+ let TSFlags{7} = VOP3;
+ let TSFlags{8} = VOPC;
+ let TSFlags{9} = SALU;
}
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -55,6 +69,7 @@ class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -73,6 +88,7 @@ class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -90,6 +106,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -106,6 +123,7 @@ class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
@@ -123,6 +141,7 @@ class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let SALU = 1;
}
class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
@@ -140,6 +159,7 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm,
let Inst{31-27} = 0x18; //encoding
let LGKM_CNT = 1;
+ let SMRD = 1;
}
//===----------------------------------------------------------------------===//
@@ -162,6 +182,8 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP1 = 1;
}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -180,6 +202,8 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP2 = 1;
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -208,6 +232,8 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -234,6 +260,8 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
@@ -251,6 +279,7 @@ class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let VOPC = 1;
}
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
@@ -414,6 +443,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
+ let MIMG = 1;
}
def EXP : Enc64<
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 551ae86..ab55c1b 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -15,10 +15,10 @@
#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIDefines.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInstrDesc.h"
-#include <stdio.h>
using namespace llvm;
@@ -31,6 +31,10 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const {
return RI;
}
+//===----------------------------------------------------------------------===//
+// TargetInstrInfo callbacks
+//===----------------------------------------------------------------------===//
+
void
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -118,14 +122,14 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::VReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_32RegClass.contains(SrcReg) ||
- AMDGPU::SReg_32RegClass.contains(SrcReg));
+ AMDGPU::SReg_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
} else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
- AMDGPU::SReg_64RegClass.contains(SrcReg));
+ AMDGPU::SReg_64RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_1;
@@ -136,19 +140,19 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
- AMDGPU::SReg_128RegClass.contains(SrcReg));
+ AMDGPU::SReg_128RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_3;
} else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
- AMDGPU::SReg_256RegClass.contains(SrcReg));
+ AMDGPU::SReg_256RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_7;
} else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
- AMDGPU::SReg_512RegClass.contains(SrcReg));
+ AMDGPU::SReg_512RegClass.contains(SrcReg));
Opcode = AMDGPU::V_MOV_B32_e32;
SubIndices = Sub0_15;
@@ -168,7 +172,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
-
int NewOpc;
// Try to map original to commuted opcode
@@ -185,11 +188,36 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
bool NewMI) const {
- if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg() ||
- !MI->getOperand(2).isReg())
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
return 0;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ // Cannot commute VOP2 if src0 is SGPR.
+ if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
+ return 0;
+
+ if (!MI->getOperand(2).isReg()) {
+ // XXX: Commute instructions with FPImm operands
+ if (NewMI || MI->getOperand(2).isFPImm() ||
+ (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ return 0;
+ }
+
+ // XXX: Commute VOP3 instructions with abs and neg set.
+ if (isVOP3(MI->getOpcode()) &&
+ (MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::abs)).getImm() ||
+ MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::neg)).getImm()))
+ return 0;
+
+ unsigned Reg = MI->getOperand(1).getReg();
+ MI->getOperand(1).ChangeToImmediate(MI->getOperand(2).getImm());
+ MI->getOperand(2).ChangeToRegister(Reg, false);
+ } else {
+ MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ }
if (MI)
MI->setDesc(get(commuteOpcode(MI->getOpcode())));
@@ -197,15 +225,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
return MI;
}
-MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const {
- MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_B32_e32), DebugLoc());
- MachineInstrBuilder MIB(*MF, MI);
- MIB.addReg(DstReg, RegState::Define);
- MIB.addImm(Imm);
-
- return MI;
-
+MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg,
+ unsigned SrcReg) const {
+ return BuildMI(*MBB, I, MBB->findDebugLoc(I), get(AMDGPU::V_MOV_B32_e32),
+ DstReg) .addReg(SrcReg);
}
bool SIInstrInfo::isMov(unsigned Opcode) const {
@@ -224,32 +249,397 @@ SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
return RC != &AMDGPU::EXECRegRegClass;
}
-//===----------------------------------------------------------------------===//
-// Indirect addressing callbacks
-//===----------------------------------------------------------------------===//
+int SIInstrInfo::isMIMG(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::MIMG;
+}
-unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const {
- assert(Channel == 0);
- return RegIndex;
+int SIInstrInfo::isSMRD(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SMRD;
+}
+
+bool SIInstrInfo::isVOP1(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP1;
+}
+
+bool SIInstrInfo::isVOP2(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP2;
+}
+
+bool SIInstrInfo::isVOP3(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3;
+}
+
+bool SIInstrInfo::isVOPC(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOPC;
+}
+
+bool SIInstrInfo::isSALUInstr(const MachineInstr &MI) const {
+ return get(MI.getOpcode()).TSFlags & SIInstrFlags::SALU;
+}
+
+bool SIInstrInfo::isInlineConstant(const MachineOperand &MO) const {
+ if(MO.isImm()) {
+ return MO.getImm() >= -16 && MO.getImm() <= 64;
+ }
+ if (MO.isFPImm()) {
+ return MO.getFPImm()->isExactlyValue(0.0) ||
+ MO.getFPImm()->isExactlyValue(0.5) ||
+ MO.getFPImm()->isExactlyValue(-0.5) ||
+ MO.getFPImm()->isExactlyValue(1.0) ||
+ MO.getFPImm()->isExactlyValue(-1.0) ||
+ MO.getFPImm()->isExactlyValue(2.0) ||
+ MO.getFPImm()->isExactlyValue(-2.0) ||
+ MO.getFPImm()->isExactlyValue(4.0) ||
+ MO.getFPImm()->isExactlyValue(-4.0);
+ }
+ return false;
+}
+
+bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
+ return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
+}
+
+bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const {
+ uint16_t Opcode = MI->getOpcode();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ // Verify VOP*
+ if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ unsigned ConstantBusCount = 0;
+ unsigned SGPRUsed = AMDGPU::NoRegister;
+ for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() &&
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+
+ // EXEC register uses the constant bus.
+ if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC)
+ ++ConstantBusCount;
+
+ // SGPRs use the constant bus
+ if (MO.getReg() == AMDGPU::M0 || MO.getReg() == AMDGPU::VCC ||
+ (!MO.isImplicit() &&
+ (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) ||
+ AMDGPU::SGPR_64RegClass.contains(MO.getReg())))) {
+ if (SGPRUsed != MO.getReg()) {
+ ++ConstantBusCount;
+ SGPRUsed = MO.getReg();
+ }
+ }
+ }
+ // Literal constants use the constant bus.
+ if (isLiteralConstant(MO))
+ ++ConstantBusCount;
+ }
+ if (ConstantBusCount > 1) {
+ ErrInfo = "VOP* instruction uses the constant bus more than once";
+ return false;
+ }
+ }
+
+ // Verify SRC1 for VOP2 and VOPC
+ if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
+ const MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm() || Src1.isFPImm()) {
+ ErrInfo = "VOP[2C] src1 cannot be an immediate.";
+ return false;
+ }
+ }
+
+ // Verify VOP3
+ if (isVOP3(Opcode)) {
+ if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
+ ErrInfo = "VOP3 src0 cannot be a literal constant.";
+ return false;
+ }
+ if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
+ ErrInfo = "VOP3 src1 cannot be a literal constant.";
+ return false;
+ }
+ if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
+ ErrInfo = "VOP3 src2 cannot be a literal constant.";
+ return false;
+ }
+ }
+ return true;
+}
+
+unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ default: return AMDGPU::INSTRUCTION_LIST_END;
+ case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
+ case AMDGPU::COPY: return AMDGPU::COPY;
+ case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32;
+ case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
+ case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
+ case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
+ case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
+ case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
+ case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
+ case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
+ case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
+ case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ }
+}
+
+bool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const {
+ return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END;
+}
+
+const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MCInstrDesc &Desc = get(MI.getOpcode());
+ if (MI.isVariadic() || OpNo >= Desc.getNumOperands() ||
+ Desc.OpInfo[OpNo].RegClass == -1)
+ return MRI.getRegClass(MI.getOperand(OpNo).getReg());
+
+ unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+ return RI.getRegClass(RCID);
+}
+
+bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::REG_SEQUENCE:
+ return RI.hasVGPRs(getOpRegClass(MI, 0));
+ default:
+ return RI.hasVGPRs(getOpRegClass(MI, OpNo));
+ }
}
+void SIInstrInfo::legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const {
+ MachineBasicBlock::iterator I = MI;
+ MachineOperand &MO = MI->getOperand(OpIdx);
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ unsigned RCID = get(MI->getOpcode()).OpInfo[OpIdx].RegClass;
+ const TargetRegisterClass *RC = RI.getRegClass(RCID);
+ unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+ if (MO.isReg()) {
+ Opcode = AMDGPU::COPY;
+ } else if (RI.isSGPRClass(RC)) {
+ Opcode = AMDGPU::S_MOV_B32;
+ }
+
+ const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
+ unsigned Reg = MRI.createVirtualRegister(VRC);
+ BuildMI(*MI->getParent(), I, MI->getParent()->findDebugLoc(I), get(Opcode),
+ Reg).addOperand(MO);
+ MO.ChangeToRegister(Reg, false);
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src0);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src1);
+ int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+ AMDGPU::OpName::src2);
+
+ // Legalize VOP2
+ if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+
+ // If the instruction implicitly reads VCC, we can't have any SGPR operands,
+ // so move any.
+ bool ReadsVCC = MI->readsRegister(AMDGPU::VCC, &RI);
+ if (ReadsVCC && Src0.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src0.getReg()))) {
+ legalizeOpWithMove(MI, Src0Idx);
+ return;
+ }
+
+ if (ReadsVCC && Src1.isReg() &&
+ RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ // Legalize VOP2 instructions where src1 is not a VGPR. An SGPR input must
+ // be the first operand, and there can only be one.
+ if (Src1.isImm() || Src1.isFPImm() ||
+ (Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg())))) {
+ if (MI->isCommutable()) {
+ if (commuteInstruction(MI))
+ return;
+ }
+ legalizeOpWithMove(MI, Src1Idx);
+ }
+ }
+
+ // XXX - Do any VOP3 instructions read VCC?
+ // Legalize VOP3
+ if (isVOP3(MI->getOpcode())) {
+ int VOP3Idx[3] = {Src0Idx, Src1Idx, Src2Idx};
+ unsigned SGPRReg = AMDGPU::NoRegister;
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ continue;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ if (MO.isReg()) {
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
+ }
+ } else if (!isLiteralConstant(MO)) {
+ // If it is not a register and not a literal constant, then it must be
+ // an inline constant which is always legal.
+ continue;
+ }
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+ }
+
+ // Legalize REG_SEQUENCE
+ // The register class of the operands much be the same type as the register
+ // class of the output.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ const TargetRegisterClass *OpRC =
+ MRI.getRegClass(MI->getOperand(i).getReg());
+ if (RI.hasVGPRs(OpRC)) {
+ VRC = OpRC;
+ } else {
+ SRC = OpRC;
+ }
+ }
+
+ // If any of the operands are VGPR registers, then they all most be
+ // otherwise we will create illegal VGPR->SGPR copies when legalizing
+ // them.
+ if (VRC || !RI.isSGPRClass(getOpRegClass(*MI, 0))) {
+ if (!VRC) {
+ assert(SRC);
+ VRC = RI.getEquivalentVGPRClass(SRC);
+ }
+ RC = VRC;
+ } else {
+ RC = SRC;
+ }
-int SIInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+ // Update all the operands so they have the same type.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
+ if (!MI->getOperand(i).isReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ continue;
+ unsigned DstReg = MRI.createVirtualRegister(RC);
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ get(AMDGPU::COPY), DstReg)
+ .addOperand(MI->getOperand(i));
+ MI->getOperand(i).setReg(DstReg);
+ }
+ }
}
-int SIInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
- llvm_unreachable("Unimplemented");
+void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
+ SmallVector<MachineInstr *, 128> Worklist;
+ Worklist.push_back(&TopInst);
+
+ while (!Worklist.empty()) {
+ MachineInstr *Inst = Worklist.pop_back_val();
+ unsigned NewOpcode = getVALUOp(*Inst);
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
+ continue;
+
+ MachineRegisterInfo &MRI = Inst->getParent()->getParent()->getRegInfo();
+
+ // Use the new VALU Opcode.
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Inst->setDesc(NewDesc);
+
+ // Remove any references to SCC. Vector instructions can't read from it, and
+ // We're just about to add the implicit use / defs of VCC, and we don't want
+ // both.
+ for (unsigned i = Inst->getNumOperands() - 1; i > 0; --i) {
+ MachineOperand &Op = Inst->getOperand(i);
+ if (Op.isReg() && Op.getReg() == AMDGPU::SCC)
+ Inst->RemoveOperand(i);
+ }
+
+ // Add the implict and explicit register definitions.
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitUses[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitDefs[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ legalizeOperands(Inst);
+
+ // Update the destination register class.
+ const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
+
+ switch (Inst->getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual
+ // register class associated with the operand, so we need to find an
+ // equivalent VGPR register class in order to move the instruction to the
+ // VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ if (RI.hasVGPRs(NewDstRC))
+ continue;
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ continue;
+ break;
+ default:
+ break;
+ }
+
+ unsigned DstReg = Inst->getOperand(0).getReg();
+ unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I;
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
+ }
+ }
+ }
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrStoreRegClass(
- unsigned SourceReg) const {
- llvm_unreachable("Unimplemented");
+//===----------------------------------------------------------------------===//
+// Indirect addressing callbacks
+//===----------------------------------------------------------------------===//
+
+unsigned SIInstrInfo::calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const {
+ assert(Channel == 0);
+ return RegIndex;
}
-const TargetRegisterClass *SIInstrInfo::getIndirectAddrLoadRegClass() const {
- llvm_unreachable("Unimplemented");
+const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
+ return &AMDGPU::VReg_32RegClass;
}
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
@@ -257,7 +647,17 @@ MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_DST_V1))
+ .addReg(IndirectBaseReg, RegState::Define)
+ .addOperand(I->getOperand(0))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0)
+ .addReg(ValueReg);
}
MachineInstrBuilder SIInstrInfo::buildIndirectRead(
@@ -265,9 +665,43 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
MachineBasicBlock::iterator I,
unsigned ValueReg,
unsigned Address, unsigned OffsetReg) const {
- llvm_unreachable("Unimplemented");
+ const DebugLoc &DL = MBB->findDebugLoc(I);
+ unsigned IndirectBaseReg = AMDGPU::VReg_32RegClass.getRegister(
+ getIndirectIndexBegin(*MBB->getParent()));
+
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ .addOperand(I->getOperand(0))
+ .addOperand(I->getOperand(1))
+ .addReg(IndirectBaseReg)
+ .addReg(OffsetReg)
+ .addImm(0);
+
}
-const TargetRegisterClass *SIInstrInfo::getSuperIndirectRegClass() const {
- llvm_unreachable("Unimplemented");
+void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const {
+ int End = getIndirectIndexEnd(MF);
+ int Begin = getIndirectIndexBegin(MF);
+
+ if (End == -1)
+ return;
+
+
+ for (int Index = Begin; Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_32RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 1); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_64RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 2); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_96RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 3); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_128RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 7); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_256RegClass.getRegister(Index));
+
+ for (int Index = std::max(0, Begin - 15); Index <= End; ++Index)
+ Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index));
}
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 87eff4d..4af6348 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
+//===-- SIInstrInfo.h - SI Instruction Info Interface -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,6 +25,14 @@ class SIInstrInfo : public AMDGPUInstrInfo {
private:
const SIRegisterInfo RI;
+ MachineInstrBuilder buildIndirectIndexLoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned OffsetVGPR,
+ unsigned MovRelOp,
+ unsigned Dst,
+ unsigned Src0) const;
+ // If you add or remove instructions from this function, you will
+
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
@@ -40,25 +48,65 @@ public:
virtual MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI=false) const;
- virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
- int64_t Imm) const;
-
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
+ MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg) const;
virtual bool isMov(unsigned Opcode) const;
virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
-
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+ int isMIMG(uint16_t Opcode) const;
+ int isSMRD(uint16_t Opcode) const;
+ bool isVOP1(uint16_t Opcode) const;
+ bool isVOP2(uint16_t Opcode) const;
+ bool isVOP3(uint16_t Opcode) const;
+ bool isVOPC(uint16_t Opcode) const;
+ bool isInlineConstant(const MachineOperand &MO) const;
+ bool isLiteralConstant(const MachineOperand &MO) const;
+
+ virtual bool verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const;
+
+ bool isSALUInstr(const MachineInstr &MI) const;
+ static unsigned getVALUOp(const MachineInstr &MI);
+ bool isSALUOpSupportedOnVALU(const MachineInstr &MI) const;
+
+ /// \brief Return the correct register class for \p OpNo. For target-specific
+ /// instructions, this will return the register class that has been defined
+ /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
+ /// the register class of its machine operand.
+ /// to infer the correct register class base on the other operands.
+ const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
+ unsigned OpNo) const;\
+
+ /// \returns true if it is legal for the operand at index \p OpNo
+ /// to read a VGPR.
+ bool canReadVGPR(const MachineInstr &MI, unsigned OpNo) const;
+
+ /// \brief Legalize the \p OpIndex operand of this instruction by inserting
+ /// a MOV. For example:
+ /// ADD_I32_e32 VGPR0, 15
+ /// to
+ /// MOV VGPR1, 15
+ /// ADD_I32_e32 VGPR0, VGPR1
+ ///
+ /// If the operand being legalized is a register, then a COPY will be used
+ /// instead of MOV.
+ void legalizeOpWithMove(MachineInstr *MI, unsigned OpIdx) const;
+
+ /// \brief Legalize all operands in this instruction. This function may
+ /// create new instruction and insert them before \p MI.
+ void legalizeOperands(MachineInstr *MI) const;
+
+ /// \brief Replace this instruction's opcode with the equivalent VALU
+ /// opcode. This function will also move the users of \p MI to the
+ /// VALU if necessary.
+ void moveToVALU(MachineInstr &MI) const;
virtual unsigned calculateIndirectAddress(unsigned RegIndex,
unsigned Channel) const;
- virtual const TargetRegisterClass *getIndirectAddrStoreRegClass(
- unsigned SourceReg) const;
-
- virtual const TargetRegisterClass *getIndirectAddrLoadRegClass() const;
+ virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
@@ -71,16 +119,18 @@ public:
unsigned ValueReg,
unsigned Address,
unsigned OffsetReg) const;
+ void reserveIndirectRegisters(BitVector &Reserved,
+ const MachineFunction &MF) const;
- virtual const TargetRegisterClass *getSuperIndirectRegClass() const;
- };
+ void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
+ unsigned SavReg, unsigned IndexReg) const;
+};
namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
- int isMIMG(uint16_t Opcode);
} // End namespace AMDGPU
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 52af79c..4cd0daa 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -16,6 +16,45 @@ def SIadd64bit32bit : SDNode<"ISD::ADD",
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
>;
+def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
+ [SDNPMayLoad, SDNPMemOperand]
+>;
+
+def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
+ SDTypeProfile<0, 13,
+ [SDTCisVT<0, i128>, // rsrc(SGPR)
+ SDTCisVT<1, iAny>, // vdata(VGPR)
+ SDTCisVT<2, i32>, // num_channels(imm)
+ SDTCisVT<3, i32>, // vaddr(VGPR)
+ SDTCisVT<4, i32>, // soffset(SGPR)
+ SDTCisVT<5, i32>, // inst_offset(imm)
+ SDTCisVT<6, i32>, // dfmt(imm)
+ SDTCisVT<7, i32>, // nfmt(imm)
+ SDTCisVT<8, i32>, // offen(imm)
+ SDTCisVT<9, i32>, // idxen(imm)
+ SDTCisVT<10, i32>, // glc(imm)
+ SDTCisVT<11, i32>, // slc(imm)
+ SDTCisVT<12, i32> // tfe(imm)
+ ]>,
+ [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
+>;
+
+def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
+ SDTCisVT<3, i32>]>
+>;
+
+class SDSample<string opcode> : SDNode <opcode,
+ SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
+ SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
+>;
+
+def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
+def SIsampleb : SDSample<"AMDGPUISD::SAMPLEB">;
+def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
+def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
+
// Transformation function, extract the lower 32bit of a 64bit immediate
def LO32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, MVT::i32);
@@ -45,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf <
}]>
>;
+def as_i1imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i1);
+}]>;
+
+def as_i8imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i8);
+}]>;
+
def as_i16imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i16);
}]>;
@@ -58,6 +105,26 @@ class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
(*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0;
}]>;
+class SGPRImm <dag frag> : PatLeaf<frag, [{
+ if (TM.getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ return false;
+ }
+ const SIRegisterInfo *SIRI =
+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
+ for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
+ U != E; ++U) {
+ if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+ return true;
+ }
+ }
+ return false;
+}]>;
+
+def FRAMEri64 : Operand<iPTR> {
+ let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
+}
+
//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -109,6 +176,11 @@ class SOP2_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
opName#" $dst, $src0, $src1", pattern
>;
+class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
+ op, (outs SReg_64:$dst), (ins SSrc_64:$src0, SSrc_32:$src1),
+ opName#" $dst, $src0, $src1", pattern
+>;
+
class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -184,6 +256,12 @@ multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern>
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern>
: VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+multiclass VOP1_32_64 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_32, VSrc_64, opName, pattern>;
+
+multiclass VOP1_64_32 <bits<8> op, string opName, list<dag> pattern>
+ : VOP1_Helper <op, VReg_64, VSrc_32, opName, pattern>;
+
multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
string opName, list<dag> pattern, string revOp> {
def _e32 : VOP2 <
@@ -320,6 +398,18 @@ class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
let vdst = 0;
}
+class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS <
+ op,
+ (outs rc:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i8imm:$offset0,
+ i8imm:$offset1),
+ asm#" $gds, $vdst, $addr, $data0, $offset0, $offset1, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+ let data1 = 0;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
@@ -358,9 +448,9 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
}
}
-class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
- ValueType VT> :
- MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+ MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ i16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
[]> {
@@ -391,11 +481,18 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
let mayStore = 0;
}
-class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
+class MIMG_Mask <string op, int channels> {
+ string Op = op;
+ int Channels = channels;
+}
+
+class MIMG_NoSampler_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
op,
- (outs VReg_128:$vdata),
+ (outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc",
@@ -406,11 +503,31 @@ class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
let hasPostISelHook = 1;
}
-class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
+multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm> {
+ defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
op,
- (outs VReg_128:$vdata),
+ (outs dst_rc:$vdata),
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
- i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
SReg_256:$srsrc, SReg_128:$ssamp),
asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
#" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
@@ -420,6 +537,28 @@ class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
let hasPostISelHook = 1;
}
+multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm> {
+ defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
+}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
@@ -442,6 +581,14 @@ def getCommuteRev : InstrMapping {
let ValueCols = [["0"]];
}
+def getMaskedMIMGOp : InstrMapping {
+ let FilterClass = "MIMG_Mask";
+ let RowFields = ["Op"];
+ let ColFields = ["Channels"];
+ let KeyCol = ["4"];
+ let ValueCols = [["1"], ["2"], ["3"] ];
+}
+
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
let FilterClass = "VOP2_REV";
@@ -451,13 +598,4 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
-// Test if the supplied opcode is an MIMG instruction
-def isMIMG : InstrMapping {
- let FilterClass = "MIMG";
- let RowFields = ["Inst"];
- let ColFields = ["Size"];
- let KeyCol = ["8"];
- let ValueCols = [["8"]];
-}
-
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 500d15e..76f05eb 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -23,7 +23,9 @@ def InterpSlot : Operand<i32> {
}
def isSI : Predicate<"Subtarget.getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+
+def WAIT_FLAG : InstFlag<"printWaitFlag">;
let Predicates = [isSI] in {
@@ -126,8 +128,11 @@ def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
} // End isCompare = 1
-def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
-def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+let Defs = [SCC], isCommutable = 1 in {
+ def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
+ def S_MULK_I32 : SOPK_32 <0x00000010, "S_MULK_I32", []>;
+}
+
//def S_CBRANCH_I_FORK : SOPK_ <0x00000011, "S_CBRANCH_I_FORK", []>;
def S_GETREG_B32 : SOPK_32 <0x00000012, "S_GETREG_B32", []>;
def S_SETREG_B32 : SOPK_32 <0x00000013, "S_SETREG_B32", []>;
@@ -138,19 +143,19 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
let isCompare = 1 in {
defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
-defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_LT>;
-defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_EQ>;
-defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_LE>;
-defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_GT>;
-defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32", f32, COND_NE>;
-defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_GE>;
-defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32">;
-defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32">;
+defm V_CMP_LT_F32 : VOPC_32 <0x00000001, "V_CMP_LT_F32", f32, COND_OLT>;
+defm V_CMP_EQ_F32 : VOPC_32 <0x00000002, "V_CMP_EQ_F32", f32, COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_32 <0x00000003, "V_CMP_LE_F32", f32, COND_OLE>;
+defm V_CMP_GT_F32 : VOPC_32 <0x00000004, "V_CMP_GT_F32", f32, COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_32 <0x00000005, "V_CMP_LG_F32">;
+defm V_CMP_GE_F32 : VOPC_32 <0x00000006, "V_CMP_GE_F32", f32, COND_OGE>;
+defm V_CMP_O_F32 : VOPC_32 <0x00000007, "V_CMP_O_F32", f32, COND_O>;
+defm V_CMP_U_F32 : VOPC_32 <0x00000008, "V_CMP_U_F32", f32, COND_UO>;
defm V_CMP_NGE_F32 : VOPC_32 <0x00000009, "V_CMP_NGE_F32">;
defm V_CMP_NLG_F32 : VOPC_32 <0x0000000a, "V_CMP_NLG_F32">;
defm V_CMP_NGT_F32 : VOPC_32 <0x0000000b, "V_CMP_NGT_F32">;
defm V_CMP_NLE_F32 : VOPC_32 <0x0000000c, "V_CMP_NLE_F32">;
-defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_NE>;
+defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
@@ -176,19 +181,19 @@ defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
-defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_LT>;
-defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_EQ>;
-defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_LE>;
-defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_GT>;
+defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
+defm V_CMP_EQ_F64 : VOPC_64 <0x00000022, "V_CMP_EQ_F64", f64, COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_64 <0x00000023, "V_CMP_LE_F64", f64, COND_OLE>;
+defm V_CMP_GT_F64 : VOPC_64 <0x00000024, "V_CMP_GT_F64", f64, COND_OGT>;
defm V_CMP_LG_F64 : VOPC_64 <0x00000025, "V_CMP_LG_F64">;
-defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_GE>;
-defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64">;
-defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64">;
+defm V_CMP_GE_F64 : VOPC_64 <0x00000026, "V_CMP_GE_F64", f64, COND_OGE>;
+defm V_CMP_O_F64 : VOPC_64 <0x00000027, "V_CMP_O_F64", f64, COND_O>;
+defm V_CMP_U_F64 : VOPC_64 <0x00000028, "V_CMP_U_F64", f64, COND_UO>;
defm V_CMP_NGE_F64 : VOPC_64 <0x00000029, "V_CMP_NGE_F64">;
defm V_CMP_NLG_F64 : VOPC_64 <0x0000002a, "V_CMP_NLG_F64">;
defm V_CMP_NGT_F64 : VOPC_64 <0x0000002b, "V_CMP_NGT_F64">;
defm V_CMP_NLE_F64 : VOPC_64 <0x0000002c, "V_CMP_NLE_F64">;
-defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_NE>;
+defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
@@ -290,12 +295,12 @@ defm V_CMPSX_TRU_F64 : VOPC_64 <0x0000007f, "V_CMPSX_TRU_F64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I32 : VOPC_32 <0x00000080, "V_CMP_F_I32">;
-defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_LT>;
+defm V_CMP_LT_I32 : VOPC_32 <0x00000081, "V_CMP_LT_I32", i32, COND_SLT>;
defm V_CMP_EQ_I32 : VOPC_32 <0x00000082, "V_CMP_EQ_I32", i32, COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_LE>;
-defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_GT>;
+defm V_CMP_LE_I32 : VOPC_32 <0x00000083, "V_CMP_LE_I32", i32, COND_SLE>;
+defm V_CMP_GT_I32 : VOPC_32 <0x00000084, "V_CMP_GT_I32", i32, COND_SGT>;
defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
-defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_GE>;
+defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -312,12 +317,12 @@ defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
-defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64">;
-defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64">;
-defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64">;
-defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64">;
-defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64">;
-defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64">;
+defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
+defm V_CMP_EQ_I64 : VOPC_64 <0x000000a2, "V_CMP_EQ_I64", i64, COND_EQ>;
+defm V_CMP_LE_I64 : VOPC_64 <0x000000a3, "V_CMP_LE_I64", i64, COND_SLE>;
+defm V_CMP_GT_I64 : VOPC_64 <0x000000a4, "V_CMP_GT_I64", i64, COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
+defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -334,12 +339,12 @@ defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
-defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32">;
-defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32">;
-defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32">;
-defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32">;
-defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32">;
-defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32">;
+defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
+defm V_CMP_EQ_U32 : VOPC_32 <0x000000c2, "V_CMP_EQ_U32", i32, COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_32 <0x000000c3, "V_CMP_LE_U32", i32, COND_ULE>;
+defm V_CMP_GT_U32 : VOPC_32 <0x000000c4, "V_CMP_GT_U32", i32, COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
+defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -356,12 +361,12 @@ defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
} // End hasSideEffects = 1, Defs = [EXEC]
defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
-defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64">;
-defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64">;
-defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64">;
-defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64">;
-defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64">;
-defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64">;
+defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
+defm V_CMP_EQ_U64 : VOPC_64 <0x000000e2, "V_CMP_EQ_U64", i64, COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_64 <0x000000e3, "V_CMP_LE_U64", i64, COND_ULE>;
+defm V_CMP_GT_U64 : VOPC_64 <0x000000e4, "V_CMP_GT_U64", i64, COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
+defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
let hasSideEffects = 1, Defs = [EXEC] in {
@@ -391,8 +396,16 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End isCompare = 1
+def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
+def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
+def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
+def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
def DS_READ_B32 : DS_Load_Helper <0x00000036, "DS_READ_B32", VReg_32>;
+def DS_READ_I8 : DS_Load_Helper <0x00000039, "DS_READ_I8", VReg_32>;
+def DS_READ_U8 : DS_Load_Helper <0x0000003a, "DS_READ_U8", VReg_32>;
+def DS_READ_I16 : DS_Load_Helper <0x0000003b, "DS_READ_I16", VReg_32>;
+def DS_READ_U16 : DS_Load_Helper <0x0000003c, "DS_READ_U16", VReg_32>;
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
@@ -409,19 +422,25 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", V
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
-//def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
-//def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
+
+def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
+ 0x00000018, "BUFFER_STORE_BYTE", VReg_32
+>;
+
+def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
+ 0x0000001a, "BUFFER_STORE_SHORT", VReg_32
+>;
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+ 0x0000001c, "BUFFER_STORE_DWORD", VReg_32
>;
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+ 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64
>;
def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32
+ 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -463,21 +482,24 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>;
-//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>;
-//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>;
-//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>;
-//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>;
+def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>;
+def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>;
+def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
+def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
let mayLoad = 1 in {
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>;
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
@@ -500,8 +522,8 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
-//def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
@@ -510,7 +532,7 @@ def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
//def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
//def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
@@ -528,20 +550,20 @@ def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">;
+defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "IMAGE_SAMPLE">;
//def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "IMAGE_SAMPLE_D">;
//def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "IMAGE_SAMPLE_L">;
+defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "IMAGE_SAMPLE_B">;
//def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
//def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "IMAGE_SAMPLE_C">;
//def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
-def IMAGE_SAMPLE_C_D : MIMG_Sampler_Helper <0x0000002a, "IMAGE_SAMPLE_C_D">;
+defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "IMAGE_SAMPLE_C_D">;
//def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
-def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "IMAGE_SAMPLE_C_L">;
+defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
//def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
//def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -603,15 +625,21 @@ defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
} // End neverHasSideEffects = 1, isMoveImm = 1
defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
-//defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
-//defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
+defm V_CVT_I32_F64 : VOP1_32_64 <0x00000003, "V_CVT_I32_F64",
+ [(set i32:$dst, (fp_to_sint f64:$src0))]
+>;
+defm V_CVT_F64_I32 : VOP1_64_32 <0x00000004, "V_CVT_F64_I32",
+ [(set f64:$dst, (sint_to_fp i32:$src0))]
+>;
defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
[(set f32:$dst, (sint_to_fp i32:$src0))]
>;
defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
[(set f32:$dst, (uint_to_fp i32:$src0))]
>;
-defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32",
+ [(set i32:$dst, (fp_to_uint f32:$src0))]
+>;
defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
@@ -621,8 +649,12 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
-//defm V_CVT_F32_F64 : VOP1_32 <0x0000000f, "V_CVT_F32_F64", []>;
-//defm V_CVT_F64_F32 : VOP1_64 <0x00000010, "V_CVT_F64_F32", []>;
+defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
+ [(set f32:$dst, (fround f64:$src0))]
+>;
+defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
+ [(set f64:$dst, (fextend f32:$src0))]
+>;
//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
@@ -791,7 +823,7 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
let mayStore = 1;
}
-def S_WAITCNT : SOPP <0x0000000c, (ins i32imm:$simm16), "S_WAITCNT $simm16",
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
[]
>;
} // End hasSideEffects
@@ -827,6 +859,11 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+def : Pat <
+ (i32 (trunc i64:$val)),
+ (EXTRACT_SUBREG $val, sub0)
+>;
+
//use two V_CNDMASK_B32_e64 instructions for f64
def : Pat <
(f64 (select i1:$src2, f64:$src1, f64:$src0)),
@@ -910,9 +947,13 @@ defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
+let hasPostISelHook = 1 in {
+
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
+
+}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
@@ -936,16 +977,13 @@ defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
- [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
->;
-
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
- [(set i32:$dst, (sub i32:$src0, i32:$src1))]
->;
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
-let Uses = [VCC] in { // Carry-out comes from VCC
+let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
@@ -999,8 +1037,12 @@ def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
defm : BFIPatterns <V_BFI_B32>;
-def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
-def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
+def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
+ [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
+ [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
+>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
def : ROTRPattern <V_ALIGNBIT_B32>;
@@ -1087,12 +1129,31 @@ def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+
+let Defs = [SCC] in { // Carry out goes to SCC
+let isCommutable = 1 in {
def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+ [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
+>;
+} // End isCommutable = 1
+
def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", []>;
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", []>;
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+ [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
+>;
+
+let Uses = [SCC] in { // Carry in comes from SCC
+let isCommutable = 1 in {
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+ [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End isCommutable = 1
+
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+ [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End Uses = [SCC]
+} // End Defs = [SCC]
+
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
@@ -1124,7 +1185,9 @@ def : Pat <
(S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+ [(set i1:$dst, (xor i1:$src0, i1:$src1))]
+>;
def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
@@ -1135,12 +1198,31 @@ def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", []>;
-def S_LSHL_B64 : SOP2_64 <0x0000001f, "S_LSHL_B64", []>;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", []>;
-def S_LSHR_B64 : SOP2_64 <0x00000021, "S_LSHR_B64", []>;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", []>;
-def S_ASHR_I64 : SOP2_64 <0x00000023, "S_ASHR_I64", []>;
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+ [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+ [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+ [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
+
+} // End AddedComplexity = 1
+
def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
@@ -1160,7 +1242,7 @@ def LOAD_CONST : AMDGPUShaderInst <
[(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
>;
-// SI Psuedo instructions. These are used by the CFG structurizer pass
+// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
@@ -1233,6 +1315,36 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
+//defm SI_ : RegisterLoadStore <VReg_32, FRAMEri64, ADDRIndirect>;
+
+let UseNamedOperandTable = 1 in {
+
+def SI_RegisterLoad : AMDGPUShaderInst <
+ (outs VReg_32:$dst, SReg_64:$temp),
+ (ins FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterLoad = 1;
+ let mayLoad = 1;
+}
+
+class SIRegStore<dag outs> : AMDGPUShaderInst <
+ outs,
+ (ins VReg_32:$val, FRAMEri64:$addr, i32imm:$chan),
+ "", []
+> {
+ let isRegisterStore = 1;
+ let mayStore = 1;
+}
+
+let usesCustomInserter = 1 in {
+def SI_RegisterStorePseudo : SIRegStore<(outs)>;
+} // End usesCustomInserter = 1
+def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
+
+
+} // End UseNamedOperandTable = 1
+
def SI_INDIRECT_SRC : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins unknown:$src, VSrc_32:$idx, i32imm:$off),
@@ -1249,6 +1361,7 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
let Constraints = "$src = $dst";
}
+def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VReg_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
@@ -1258,7 +1371,7 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
let usesCustomInserter = 1 in {
-// This psuedo instruction takes a pointer as input and outputs a resource
+// This pseudo instruction takes a pointer as input and outputs a resource
// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
@@ -1289,7 +1402,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
>;
@@ -1310,67 +1423,85 @@ def : Pat <
/********** Image sampling patterns **********/
/********** ======================= **********/
-/* int_SI_sample for simple 1D texture lookup */
+/* SIsample for simple 1D texture lookup */
def : Pat <
- (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
- (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
+ (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
+class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
+class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleShadowPattern<Intrinsic name, MIMG opcode,
+class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
+class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
-/* int_SI_sample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<ValueType addr_type> {
- def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
- def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
-
- def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
- def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
- def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
- def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
-
- def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
- def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
- def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
-
- def : SamplePattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
- def : SampleArrayPattern <int_SI_sampled, IMAGE_SAMPLE_D, addr_type>;
- def : SampleShadowPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
- def : SampleShadowArrayPattern <int_SI_sampled, IMAGE_SAMPLE_C_D, addr_type>;
+/* SIsample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
+ MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
+MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
+ def : SamplePattern <SIsample, sample, addr_type>;
+ def : SampleRectPattern <SIsample, sample, addr_type>;
+ def : SampleArrayPattern <SIsample, sample, addr_type>;
+ def : SampleShadowPattern <SIsample, sample_c, addr_type>;
+ def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
+
+ def : SamplePattern <SIsamplel, sample_l, addr_type>;
+ def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
+ def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
+ def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
+
+ def : SamplePattern <SIsampleb, sample_b, addr_type>;
+ def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
+ def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
+ def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
+
+ def : SamplePattern <SIsampled, sample_d, addr_type>;
+ def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
+ def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
+ def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
}
-defm : SamplePatterns<v2i32>;
-defm : SamplePatterns<v4i32>;
-defm : SamplePatterns<v8i32>;
-defm : SamplePatterns<v16i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
+ IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
+ IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
+ IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
+ v2i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
+ IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
+ IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
+ IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
+ v4i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
+ IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
+ IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
+ IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
+ v8i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
+ IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
+ IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
+ IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
+ v16i32>;
/* int_SI_imageload for texture fetches consuming varying address parameters */
class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
@@ -1383,23 +1514,46 @@ class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> :
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
>;
-multiclass ImageLoadPatterns<ValueType addr_type> {
- def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
- def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
+class ImageLoadMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, TEX_MSAA),
+ (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayMSAAPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+ (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY_MSAA),
+ (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<MIMG opcode, ValueType addr_type> {
+ def : ImageLoadPattern <int_SI_imageload, opcode, addr_type>;
+ def : ImageLoadArrayPattern <int_SI_imageload, opcode, addr_type>;
+}
+
+multiclass ImageLoadMSAAPatterns<MIMG opcode, ValueType addr_type> {
+ def : ImageLoadMSAAPattern <int_SI_imageload, opcode, addr_type>;
+ def : ImageLoadArrayMSAAPattern <int_SI_imageload, opcode, addr_type>;
}
-defm : ImageLoadPatterns<v2i32>;
-defm : ImageLoadPatterns<v4i32>;
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V2, v2i32>;
+defm : ImageLoadPatterns<IMAGE_LOAD_MIP_V4_V4, v4i32>;
+
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V2, v2i32>;
+defm : ImageLoadMSAAPatterns<IMAGE_LOAD_V4_V4, v4i32>;
/* Image resource information */
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
- (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
def : Pat <
(int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
- (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+ (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY_MSAA),
+ (IMAGE_GET_RESINFO_V4_V1 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
>;
/********** ============================================ **********/
@@ -1470,16 +1624,6 @@ foreach Index = 0-15 in {
>;
}
-def : Vector1_Build <v1i32, i32, VReg_32>;
-def : Vector2_Build <v2i32, i32>;
-def : Vector2_Build <v2f32, f32>;
-def : Vector4_Build <v4i32, i32>;
-def : Vector4_Build <v4f32, f32>;
-def : Vector8_Build <v8i32, i32>;
-def : Vector8_Build <v8f32, f32>;
-def : Vector16_Build <v16i32, i32>;
-def : Vector16_Build <v16f32, f32>;
-
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <i32, f32, VReg_32>;
@@ -1492,9 +1636,17 @@ def : BitConvert <f64, i64, VReg_64>;
def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+def : BitConvert <v4i32, i128, VReg_128>;
+def : BitConvert <i128, v4i32, VReg_128>;
+
+def : BitConvert <v8i32, v32i8, SReg_256>;
+def : BitConvert <v32i8, v8i32, SReg_256>;
+def : BitConvert <v8i32, v32i8, VReg_256>;
+def : BitConvert <v32i8, v8i32, VReg_256>;
/********** =================== **********/
/********** Src & Dst modifiers **********/
@@ -1523,6 +1675,16 @@ def : Pat <
/********** ================== **********/
def : Pat <
+ (SGPRImm<(i32 imm)>:$imm),
+ (S_MOV_B32 imm:$imm)
+>;
+
+def : Pat <
+ (SGPRImm<(f32 fpimm)>:$imm),
+ (S_MOV_B32 fpimm:$imm)
+>;
+
+def : Pat <
(i32 imm:$imm),
(V_MOV_B32_e32 imm:$imm)
>;
@@ -1634,19 +1796,19 @@ def : Pat <
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
+ (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (int_SI_load_const v16i8:$sbase, imm:$offset),
+ (SIload_constant i128:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
- (int_SI_load_const v16i8:$sbase, i32:$voff),
+ (SIload_constant i128:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
>;
@@ -1677,17 +1839,36 @@ def : Pat <
/********** Load/Store Patterns **********/
/********** ======================= **********/
-def : Pat <
- (local_load i64:$src0),
- (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
- (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
+class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag i32:$src0),
+ (vt (inst 0, $src0, $src0, $src0, 0, 0))
>;
+def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
+def : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
+def : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
+def : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
+def : DSReadPat <DS_READ_B32, i32, local_load>;
def : Pat <
- (local_store i32:$src1, i64:$src0),
- (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
+ (local_load i32:$src0),
+ (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
+>;
+
+class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
+ (frag i32:$src1, i32:$src0),
+ (inst 0, $src0, $src1, $src1, 0, 0)
>;
+def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
+def : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
+def : DSWritePat <DS_WRITE_B32, i32, local_store>;
+
+def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
+ (DS_ADD_U32_RTN 0, $ptr, $val, 0, 0)>;
+
+def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
+ (DS_SUB_U32_RTN 0, $ptr, $val, 0, 0)>;
+
/********** ================== **********/
/********** SMRD Patterns **********/
/********** ================== **********/
@@ -1717,8 +1898,11 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
//===----------------------------------------------------------------------===//
// MUBUF Patterns
@@ -1766,23 +1950,46 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
global_load, constant_load>;
-multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt> {
+multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
def : Pat <
- (global_store vt:$value, i64:$ptr),
+ (st vt:$value, i64:$ptr),
(Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
>;
def : Pat <
- (global_store vt:$value, (add i64:$ptr, i64:$offset)),
+ (st vt:$value, (add i64:$ptr, i64:$offset)),
(Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0)
>;
}
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Patterns
+//===----------------------------------------------------------------------===//
+
+// TBUFFER_STORE_FORMAT_*, addr64=0
+class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
+ (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+ i32:$soffset, imm:$inst_offset, imm:$dfmt,
+ imm:$nfmt, imm:$offen, imm:$idxen,
+ imm:$glc, imm:$slc, imm:$tfe),
+ (opcode
+ $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
+ (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
+ (as_i1imm $slc), (as_i1imm $tfe), $soffset)
+>;
+
+def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
+def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
+def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
+def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
/********** ====================== **********/
/********** Indirect adressing **********/
@@ -1834,6 +2041,37 @@ def : Pat<
(V_CMP_U_F32_e64 $src0, $src1)
>;
+//===----------------------------------------------------------------------===//
+// Miscellaneous Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (i64 (trunc i128:$x)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
+ (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
+>;
+
+def : Pat <
+ (i32 (trunc i64:$a)),
+ (EXTRACT_SUBREG $a, sub0)
+>;
+
+// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
+// case, the sgpr-copies pass will fix this to use the vector version.
+def : Pat <
+ (i32 (addc i32:$src0, i32:$src1)),
+ (S_ADD_I32 $src0, $src1)
+>;
+
+def : Pat <
+ (or i64:$a, i64:$b),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (IMPLICIT_DEF),
+ (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub0), (EXTRACT_SUBREG $b, sub0)), sub0),
+ (V_OR_B32_e32 (EXTRACT_SUBREG $a, sub1), (EXTRACT_SUBREG $b, sub1)), sub1)
+>;
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 2fa073e..7fcc964 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -17,10 +17,28 @@ let TargetPrefix = "SI", isTarget = 1 in {
def int_SI_tid : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
- def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
+ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
- class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
+ def int_SI_tbuffer_store : Intrinsic <
+ [],
+ [llvm_anyint_ty, // rsrc(SGPR)
+ llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32
+ llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW
+ llvm_i32_ty, // vaddr(VGPR)
+ llvm_i32_ty, // soffset(SGPR)
+ llvm_i32_ty, // inst_offset(imm)
+ llvm_i32_ty, // dfmt(imm)
+ llvm_i32_ty, // nfmt(imm)
+ llvm_i32_ty, // offen(imm)
+ llvm_i32_ty, // idxen(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty], // tfe(imm)
+ []>;
+
+ class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index c2e8f02..958763d 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -377,10 +377,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
+ unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Vec;
- MachineInstr *MovRel =
+ MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
- .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+ .addReg(SubReg + Off)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
@@ -395,10 +398,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
+ unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
+ if (!SubReg)
+ SubReg = Dst;
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
- .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+ .addReg(SubReg + Off, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
@@ -409,6 +415,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
bool NeedM0 = false;
@@ -476,6 +483,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
IndirectSrc(MI);
break;
+ case AMDGPU::SI_INDIRECT_DST_V1:
case AMDGPU::SI_INDIRECT_DST_V2:
case AMDGPU::SI_INDIRECT_DST_V4:
case AMDGPU::SI_INDIRECT_DST_V8:
@@ -487,6 +495,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
NeedWQM = true;
// Fall through
case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_ADD_U32_RTN:
NeedM0 = true;
break;
@@ -508,7 +517,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
AMDGPU::M0).addImm(0xffffffff);
}
- if (NeedWQM) {
+ if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index ee0e307..071f9fa 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -13,6 +13,10 @@
using namespace llvm;
+
+// Pin the vtable to this file.
+void SIMachineFunctionInfo::anchor() {}
+
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
PSInputAddr(0) { }
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 6da9f7f..2f1961c 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -22,6 +22,7 @@ namespace llvm {
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+ virtual void anchor();
public:
SIMachineFunctionInfo(const MachineFunction &MF);
unsigned PSInputAddr;
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 50fd4c7..ed0bbaf 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "SIRegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIInstrInfo.h"
using namespace llvm;
@@ -25,6 +26,10 @@ SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ Reserved.set(AMDGPU::EXEC);
+ Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ TII->reserveIndirectRegisters(Reserved, MF);
return Reserved;
}
@@ -50,6 +55,10 @@ const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
}
}
+unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
+ return getEncodingValue(Reg);
+}
+
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
@@ -70,3 +79,53 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
}
return NULL;
}
+
+bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
+ if (!RC) {
+ return false;
+ }
+ return !hasVGPRs(RC);
+}
+
+bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
+ return getCommonSubClass(&AMDGPU::VReg_32RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
+ getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+}
+
+const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const {
+ if (hasVGPRs(SRC)) {
+ return SRC;
+ } else if (SRC == &AMDGPU::SCCRegRegClass) {
+ return &AMDGPU::VCCRegRegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
+ return &AMDGPU::VReg_32RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
+ return &AMDGPU::VReg_64RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
+ return &AMDGPU::VReg_128RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
+ return &AMDGPU::VReg_256RegClass;
+ } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
+ return &AMDGPU::VReg_512RegClass;
+ }
+ return NULL;
+}
+
+const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
+ const TargetRegisterClass *RC, unsigned SubIdx) const {
+ if (SubIdx == AMDGPU::NoSubRegister)
+ return RC;
+
+ // If this register has a sub-register, we can safely assume it is a 32-bit
+ // register, becuase all of SI's sub-registers are 32-bit.
+ if (isSGPRClass(RC)) {
+ return &AMDGPU::SGPR_32RegClass;
+ } else {
+ return &AMDGPU::VGPR_32RegClass;
+ }
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d0df4f9..8148f7f 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -42,9 +42,27 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// CFGStructurizer
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+ virtual unsigned getHWRegIndex(unsigned Reg) const;
+
/// \brief Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
const TargetRegisterClass *getPhysRegClass(unsigned Reg) const;
+
+ /// \returns true if this class contains only SGPR registers
+ bool isSGPRClass(const TargetRegisterClass *RC) const;
+
+ /// \returns true if this class contains VGPR registers.
+ bool hasVGPRs(const TargetRegisterClass *RC) const;
+
+ /// \returns A VGPR reg class with the same width as \p SRC
+ const TargetRegisterClass *getEquivalentVGPRClass(
+ const TargetRegisterClass *SRC) const;
+
+ /// \returns The register class that is used for a sub-register of \p RC for
+ /// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will
+ /// be returned.
+ const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
+ unsigned SubIdx) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 292b9d2..49bdbc9 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -43,7 +43,7 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
-def SGPR_64 : RegisterTuples<[sub0, sub1],
+def SGPR_64Regs : RegisterTuples<[sub0, sub1],
[(add (decimate (trunc SGPR_32, 101), 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
@@ -153,15 +153,17 @@ def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add SGPR_32, M0Reg)
>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64], 64, (add SGPR_64Regs)>;
+
def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
- (add SGPR_64, VCCReg, EXECReg)
+ (add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
// Register class for all vector registers (VGPRs + Interploation Registers)
def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
@@ -172,9 +174,9 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
let Size = 96;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
-def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
new file mode 100644
index 0000000..f194d8b
--- /dev/null
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -0,0 +1,162 @@
+//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass removes performs the following type substitution on all
+/// non-compute shaders:
+///
+/// v16i8 => i128
+/// - v16i8 is used for constant memory resource descriptors. This type is
+/// legal for some compute APIs, and we don't want to declare it as legal
+/// in the backend, because we want the legalizer to expand all v16i8
+/// operations.
+/// v1* => *
+/// - Having v1* types complicates the legalizer and we can easily replace
+/// - them with the element type.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InstVisitor.h"
+
+using namespace llvm;
+
+namespace {
+
+class SITypeRewriter : public FunctionPass,
+ public InstVisitor<SITypeRewriter> {
+
+ static char ID;
+ Module *Mod;
+ Type *v16i8;
+ Type *i128;
+
+public:
+ SITypeRewriter() : FunctionPass(ID) { }
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "SI Type Rewriter";
+ }
+ void visitLoadInst(LoadInst &I);
+ void visitCallInst(CallInst &I);
+ void visitBitCast(BitCastInst &I);
+};
+
+} // End anonymous namespace
+
+char SITypeRewriter::ID = 0;
+
+bool SITypeRewriter::doInitialization(Module &M) {
+ Mod = &M;
+ v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
+ i128 = Type::getIntNTy(M.getContext(), 128);
+ return false;
+}
+
+bool SITypeRewriter::runOnFunction(Function &F) {
+ AttributeSet Set = F.getAttributes();
+ Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, "ShaderType");
+
+ unsigned ShaderType = ShaderType::COMPUTE;
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ Str.getAsInteger(0, ShaderType);
+ }
+ if (ShaderType != ShaderType::COMPUTE) {
+ visit(F);
+ }
+
+ visit(F);
+
+ return false;
+}
+
+void SITypeRewriter::visitLoadInst(LoadInst &I) {
+ Value *Ptr = I.getPointerOperand();
+ Type *PtrTy = Ptr->getType();
+ Type *ElemTy = PtrTy->getPointerElementType();
+ IRBuilder<> Builder(&I);
+ if (ElemTy == v16i8) {
+ Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
+ LoadInst *Load = Builder.CreateLoad(BitCast);
+ SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
+ I.getAllMetadataOtherThanDebugLoc(MD);
+ for (unsigned i = 0, e = MD.size(); i != e; ++i) {
+ Load->setMetadata(MD[i].first, MD[i].second);
+ }
+ Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
+ I.replaceAllUsesWith(BitCastLoad);
+ I.eraseFromParent();
+ }
+}
+
+void SITypeRewriter::visitCallInst(CallInst &I) {
+ IRBuilder<> Builder(&I);
+ SmallVector <Value*, 8> Args;
+ SmallVector <Type*, 8> Types;
+ bool NeedToReplace = false;
+ Function *F = I.getCalledFunction();
+ std::string Name = F->getName().str();
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ Value *Arg = I.getArgOperand(i);
+ if (Arg->getType() == v16i8) {
+ Args.push_back(Builder.CreateBitCast(Arg, i128));
+ Types.push_back(i128);
+ NeedToReplace = true;
+ Name = Name + ".i128";
+ } else if (Arg->getType()->isVectorTy() &&
+ Arg->getType()->getVectorNumElements() == 1 &&
+ Arg->getType()->getVectorElementType() ==
+ Type::getInt32Ty(I.getContext())){
+ Type *ElementTy = Arg->getType()->getVectorElementType();
+ std::string TypeName = "i32";
+ InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
+ assert(Def);
+ Args.push_back(Def->getOperand(1));
+ Types.push_back(ElementTy);
+ std::string VecTypeName = "v1" + TypeName;
+ Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
+ NeedToReplace = true;
+ } else {
+ Args.push_back(Arg);
+ Types.push_back(Arg->getType());
+ }
+ }
+
+ if (!NeedToReplace) {
+ return;
+ }
+ Function *NewF = Mod->getFunction(Name);
+ if (!NewF) {
+ NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
+ NewF->setAttributes(F->getAttributes());
+ }
+ I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
+ I.eraseFromParent();
+}
+
+void SITypeRewriter::visitBitCast(BitCastInst &I) {
+ IRBuilder<> Builder(&I);
+ if (I.getDestTy() != i128) {
+ return;
+ }
+
+ if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
+ if (Op->getSrcTy() == i128) {
+ I.replaceAllUsesWith(Op->getOperand(0));
+ I.eraseFromParent();
+ }
+ }
+}
+
+FunctionPass *llvm::createSITypeRewriter() {
+ return new SITypeRewriter();
+}
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index acf7496..6339394 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td)
tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget)
@@ -20,6 +21,8 @@ add_llvm_target(SparcCodeGen
SparcSubtarget.cpp
SparcTargetMachine.cpp
SparcSelectionDAGInfo.cpp
+ SparcJITInfo.cpp
+ SparcCodeEmitter.cpp
)
add_dependencies(LLVMSparcCodeGen SparcCommonTableGen intrinsics_gen)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index b101751..9a0466a 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "delay-slot-filler"
#include "Sparc.h"
+#include "SparcSubtarget.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -39,10 +40,13 @@ namespace {
/// layout, etc.
///
TargetMachine &TM;
+ const SparcSubtarget *Subtarget;
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm),
+ Subtarget(&TM.getSubtarget<SparcSubtarget>()) {
+ }
virtual const char *getPassName() const {
return "SPARC Delay Slot Filler";
@@ -102,6 +106,8 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
bool Changed = false;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
MachineBasicBlock::iterator MI = I;
++I;
@@ -114,6 +120,14 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
}
+ if (!Subtarget->isV9() &&
+ (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD
+ || MI->getOpcode() == SP::FCMPQ)) {
+ BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP));
+ Changed = true;
+ continue;
+ }
+
// If MI has no delay slot, skip.
if (!MI->hasDelaySlot())
continue;
@@ -126,7 +140,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
++FilledSlots;
Changed = true;
- const TargetInstrInfo *TII = TM.getInstrInfo();
if (D == MBB.end())
BuildMI(MBB, I, MI->getDebugLoc(), TII->get(SP::NOP));
else
@@ -156,7 +169,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
if (slot == MBB.begin())
return MBB.end();
- if (slot->getOpcode() == SP::RET)
+ if (slot->getOpcode() == SP::RET || slot->getOpcode() == SP::TLS_CALL)
return MBB.end();
if (slot->getOpcode() == SP::RETL) {
@@ -342,6 +355,7 @@ bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
case SP::CALL: structSizeOpNum = 1; break;
case SP::JMPLrr:
case SP::JMPLri: structSizeOpNum = 2; break;
+ case SP::TLS_CALL: return false;
}
const MachineOperand &MO = I->getOperand(structSizeOpNum);
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index 7d54d32..fd8e5d9 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -23,6 +23,7 @@ type = TargetGroup
name = Sparc
parent = Target
has_asmprinter = 1
+has_jit = 1
[component_1]
type = Library
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
index aac0e8d..f3caeaa 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -53,7 +53,27 @@ enum TOF {
// Extract bits 41-32 of an address.
// Assembler: %hm(addr)
- MO_HM
+ MO_HM,
+
+ // TargetFlags for Thread Local Storage.
+ MO_TLS_GD_HI22,
+ MO_TLS_GD_LO10,
+ MO_TLS_GD_ADD,
+ MO_TLS_GD_CALL,
+ MO_TLS_LDM_HI22,
+ MO_TLS_LDM_LO10,
+ MO_TLS_LDM_ADD,
+ MO_TLS_LDM_CALL,
+ MO_TLS_LDO_HIX22,
+ MO_TLS_LDO_LOX10,
+ MO_TLS_LDO_ADD,
+ MO_TLS_IE_HI22,
+ MO_TLS_IE_LO10,
+ MO_TLS_IE_LD,
+ MO_TLS_IE_LDX,
+ MO_TLS_IE_ADD,
+ MO_TLS_LE_HIX22,
+ MO_TLS_LE_LOX10
};
} // end namespace SPII
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 5a52abe..baac36b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -21,23 +21,26 @@ void SparcELFMCAsmInfo::anchor() { }
SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
IsLittleEndian = false;
Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::sparcv9) {
+ bool isV9 = (TheTriple.getArch() == Triple::sparcv9);
+
+ if (isV9) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
- Data64bitsDirective = 0; // .xword is only supported by V9.
+ // .xword is only supported by V9.
+ Data64bitsDirective = (isV9) ? "\t.xword\t" : 0;
ZeroDirective = "\t.skip\t";
CommentString = "!";
HasLEB128 = true;
SupportsDebugInformation = true;
-
+
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
- WeakRefDirective = "\t.weak\t";
-
PrivateGlobalPrefix = ".L";
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 621e8ff..1e58e37 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -14,12 +14,12 @@
#ifndef SPARCTARGETASMINFO_H
#define SPARCTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
- class SparcELFMCAsmInfo : public MCAsmInfo {
+ class SparcELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit SparcELFMCAsmInfo(StringRef TT);
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index 4b81ada..c171db7 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -14,7 +14,8 @@ TARGET = Sparc
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
SparcGenAsmWriter.inc SparcGenDAGISel.inc \
- SparcGenSubtargetInfo.inc SparcGenCallingConv.inc
+ SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \
+ SparcGenCodeEmitter.inc
DIRS = TargetInfo MCTargetDesc
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 98563db..f44b604 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -26,6 +26,8 @@ namespace llvm {
FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+ FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
+ JITCodeEmitter &JCE);
} // end namespace llvm;
@@ -103,5 +105,22 @@ namespace llvm {
}
llvm_unreachable("Invalid cond code");
}
+
+ inline static unsigned HI22(int64_t imm) {
+ return (unsigned)((imm >> 10) & ((1 << 22)-1));
+ }
+
+ inline static unsigned LO10(int64_t imm) {
+ return (unsigned)(imm & 0x3FF);
+ }
+
+ inline static unsigned HIX22(int64_t imm) {
+ return HI22(~imm);
+ }
+
+ inline static unsigned LOX10(int64_t imm) {
+ return ~LO10(~imm);
+ }
+
} // end namespace llvm
#endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index d42c40f..0df48f6 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -30,6 +30,10 @@ def FeatureVIS
: SubtargetFeature<"vis", "IsVIS", "true",
"Enable UltraSPARC Visual Instruction Set extensions">;
+def FeatureHardQuad
+ : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
+ "Enable quad-word floating point instructions">;
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 3fe2b44..d06c894 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -43,6 +44,7 @@ namespace {
const char *Modifier = 0);
void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+ virtual void EmitFunctionBodyStart();
virtual void EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -63,11 +65,35 @@ namespace {
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
const;
+ void EmitGlobalRegisterDecl(unsigned reg) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ OS << "\t.register "
+ << "%" << StringRef(getRegisterName(reg)).lower()
+ << ", "
+ << ((reg == SP::G6 || reg == SP::G7)? "#ignore" : "#scratch");
+ OutStreamer.EmitRawText(OS.str());
+ }
+
};
} // end of anonymous namespace
#include "SparcGenAsmWriter.inc"
+void SparcAsmPrinter::EmitFunctionBodyStart() {
+ if (!TM.getSubtarget<SparcSubtarget>().is64Bit())
+ return;
+
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ const unsigned globalRegs[] = { SP::G2, SP::G3, SP::G6, SP::G7, 0 };
+ for (unsigned i = 0; globalRegs[i] != 0; ++i) {
+ unsigned reg = globalRegs[i];
+ if (MRI.use_empty(reg))
+ continue;
+ EmitGlobalRegisterDecl(reg);
+ }
+}
+
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand (opNum);
@@ -79,11 +105,37 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
assert(TF == SPII::MO_NO_FLAG &&
"Cannot handle target flags on call address");
else if (MI->getOpcode() == SP::SETHIi)
- assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+ assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH
+ || TF == SPII::MO_TLS_GD_HI22
+ || TF == SPII::MO_TLS_LDM_HI22
+ || TF == SPII::MO_TLS_LDO_HIX22
+ || TF == SPII::MO_TLS_IE_HI22
+ || TF == SPII::MO_TLS_LE_HIX22) &&
"Invalid target flags for address operand on sethi");
+ else if (MI->getOpcode() == SP::TLS_CALL)
+ assert((TF == SPII::MO_NO_FLAG
+ || TF == SPII::MO_TLS_GD_CALL
+ || TF == SPII::MO_TLS_LDM_CALL) &&
+ "Cannot handle target flags on tls call address");
+ else if (MI->getOpcode() == SP::TLS_ADDrr)
+ assert((TF == SPII::MO_TLS_GD_ADD || TF == SPII::MO_TLS_LDM_ADD
+ || TF == SPII::MO_TLS_LDO_ADD || TF == SPII::MO_TLS_IE_ADD) &&
+ "Cannot handle target flags on add for TLS");
+ else if (MI->getOpcode() == SP::TLS_LDrr)
+ assert(TF == SPII::MO_TLS_IE_LD &&
+ "Cannot handle target flags on ld for TLS");
+ else if (MI->getOpcode() == SP::TLS_LDXrr)
+ assert(TF == SPII::MO_TLS_IE_LDX &&
+ "Cannot handle target flags on ldx for TLS");
+ else if (MI->getOpcode() == SP::XORri)
+ assert((TF == SPII::MO_TLS_LDO_LOX10 || TF == SPII::MO_TLS_LE_LOX10) &&
+ "Cannot handle target flags on xor for TLS");
else
- assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
- TF == SPII::MO_HM) &&
+ assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44
+ || TF == SPII::MO_HM
+ || TF == SPII::MO_TLS_GD_LO10
+ || TF == SPII::MO_TLS_LDM_LO10
+ || TF == SPII::MO_TLS_IE_LO10 ) &&
"Invalid target flags for small address operand");
}
#endif
@@ -102,6 +154,24 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case SPII::MO_L44: O << "%l44("; break;
case SPII::MO_HH: O << "%hh("; break;
case SPII::MO_HM: O << "%hm("; break;
+ case SPII::MO_TLS_GD_HI22: O << "%tgd_hi22("; break;
+ case SPII::MO_TLS_GD_LO10: O << "%tgd_lo10("; break;
+ case SPII::MO_TLS_GD_ADD: O << "%tgd_add("; break;
+ case SPII::MO_TLS_GD_CALL: O << "%tgd_call("; break;
+ case SPII::MO_TLS_LDM_HI22: O << "%tldm_hi22("; break;
+ case SPII::MO_TLS_LDM_LO10: O << "%tldm_lo10("; break;
+ case SPII::MO_TLS_LDM_ADD: O << "%tldm_add("; break;
+ case SPII::MO_TLS_LDM_CALL: O << "%tldm_call("; break;
+ case SPII::MO_TLS_LDO_HIX22: O << "%tldo_hix22("; break;
+ case SPII::MO_TLS_LDO_LOX10: O << "%tldo_lox10("; break;
+ case SPII::MO_TLS_LDO_ADD: O << "%tldo_add("; break;
+ case SPII::MO_TLS_IE_HI22: O << "%tie_hi22("; break;
+ case SPII::MO_TLS_IE_LO10: O << "%tie_lo10("; break;
+ case SPII::MO_TLS_IE_LD: O << "%tie_ld("; break;
+ case SPII::MO_TLS_IE_LDX: O << "%tie_ldx("; break;
+ case SPII::MO_TLS_IE_ADD: O << "%tie_add("; break;
+ case SPII::MO_TLS_LE_HIX22: O << "%tle_hix22("; break;
+ case SPII::MO_TLS_LE_LOX10: O << "%tle_lox10("; break;
}
switch (MO.getType()) {
@@ -116,7 +186,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << *MO.getMBB()->getSymbol();
return;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_BlockAddress:
O << GetBlockAddressSymbol(MO.getBlockAddress())->getName();
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index a181bcf..acd4ec2 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -117,3 +117,14 @@ def CC_Sparc64 : CallingConv<[
// arguments whether they are passed in registers or not.
CCCustom<"CC_Sparc64_Full">
]>;
+
+// Callee-saved registers are handled by the register window mechanism.
+def CSR : CalleeSavedRegs<(add)> {
+ let OtherPreserved = (add (sequence "I%u", 0, 7),
+ (sequence "L%u", 0, 7));
+}
+
+// Callee-saved registers for calls with ReturnsTwice attribute.
+def RTCSR : CalleeSavedRegs<(add)> {
+ let OtherPreserved = (add I6, I7);
+}
diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp
new file mode 100644
index 0000000..9bfe31f
--- /dev/null
+++ b/lib/Target/Sparc/SparcCodeEmitter.cpp
@@ -0,0 +1,245 @@
+//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the Sparc machine instructions
+// into relocatable machine code.
+//
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "Sparc.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
+#include "SparcRelocations.h"
+#include "SparcTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+class SparcCodeEmitter : public MachineFunctionPass {
+ SparcJITInfo *JTI;
+ const SparcInstrInfo *II;
+ const DataLayout *TD;
+ const SparcSubtarget *Subtarget;
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ const std::vector<MachineConstantPoolEntry> *MCPEs;
+ bool IsPIC;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineModuleInfo> ();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+public:
+ SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), JTI(0), II(0), TD(0),
+ TM(tm), MCE(mce), MCPEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "Sparc Machine Code Emitter";
+ }
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+ void emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB);
+
+private:
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ void emitWord(unsigned Word);
+
+ unsigned getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const;
+ void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+ void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+ void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const;
+};
+} // end anonymous namespace.
+
+char SparcCodeEmitter::ID = 0;
+
+bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ SparcTargetMachine &Target = static_cast<SparcTargetMachine &>(
+ const_cast<TargetMachine &>(MF.getTarget()));
+
+ JTI = Target.getJITInfo();
+ II = Target.getInstrInfo();
+ TD = Target.getDataLayout();
+ Subtarget = &TM.getSubtarget<SparcSubtarget> ();
+ MCPEs = &MF.getConstantPool()->getConstants();
+ JTI->Initialize(MF, IsPIC);
+ MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
+
+ do {
+ DEBUG(errs() << "JITTing function '"
+ << MF.getName() << "'\n");
+ MCE.startFunction(MF);
+
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ MBB != E; ++MBB){
+ MCE.StartMachineBasicBlock(MBB);
+ for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
+ E = MBB->instr_end(); I != E;)
+ emitInstruction(*I++, *MBB);
+ }
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI,
+ MachineBasicBlock &MBB) {
+ DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI);
+
+ MCE.processDebugLoc(MI->getDebugLoc(), true);
+
+ ++NumEmitted;
+
+ switch (MI->getOpcode()) {
+ default: {
+ emitWord(getBinaryCodeForInstr(*MI));
+ break;
+ }
+ case TargetOpcode::INLINEASM: {
+ // We allow inline assembler nodes with empty bodies - they can
+ // implicitly define registers, which is ok for JIT.
+ if (MI->getOperand(0).getSymbolName()[0]) {
+ report_fatal_error("JIT does not support inline asm!");
+ }
+ break;
+ }
+ case TargetOpcode::PROLOG_LABEL:
+ case TargetOpcode::EH_LABEL: {
+ MCE.emitLabel(MI->getOperand(0).getMCSymbol());
+ break;
+ }
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL: {
+ // Do nothing.
+ break;
+ }
+ case SP::GETPCX: {
+ report_fatal_error("JIT does not support pseudo instruction GETPCX yet!");
+ break;
+ }
+ }
+
+ MCE.processDebugLoc(MI->getDebugLoc(), false);
+}
+
+void SparcCodeEmitter::emitWord(unsigned Word) {
+ DEBUG(errs() << " 0x";
+ errs().write_hex(Word) << "\n");
+ MCE.emitWordBE(Word);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+ else if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO));
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+ else if (MO.isCPI())
+ emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+ else
+ llvm_unreachable("Unable to encode MachineOperand!");
+ return 0;
+}
+unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ unsigned TF = MO.getTargetFlags();
+ switch (TF) {
+ default:
+ case SPII::MO_NO_FLAG: break;
+ case SPII::MO_LO: return SP::reloc_sparc_lo;
+ case SPII::MO_HI: return SP::reloc_sparc_hi;
+ case SPII::MO_H44:
+ case SPII::MO_M44:
+ case SPII::MO_L44:
+ case SPII::MO_HH:
+ case SPII::MO_HM: assert(0 && "FIXME: Implement Medium/Large code model.");
+ }
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default: break;
+ case SP::CALL: return SP::reloc_sparc_pc30;
+ case SP::BA:
+ case SP::BCOND:
+ case SP::FBCOND: return SP::reloc_sparc_pc22;
+ case SP::BPXCC: return SP::reloc_sparc_pc19;
+ }
+ llvm_unreachable("unknown reloc!");
+}
+
+void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+ const_cast<GlobalValue *>(GV), 0,
+ true));
+}
+
+void SparcCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ Reloc, ES, 0, 0));
+}
+
+void SparcCodeEmitter::
+emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ Reloc, CPI, 0, false));
+}
+
+void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+ unsigned Reloc) const {
+ MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ Reloc, BB));
+}
+
+
+/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc
+/// code to the specified MCE object.
+FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new SparcCodeEmitter(TM, JCE);
+}
+
+#include "SparcGenCodeEmitter.inc"
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 536e466..c75998a 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -33,6 +33,51 @@ DisableLeafProc("disable-sparc-leaf-proc",
cl::Hidden);
+void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes,
+ unsigned ADDrr,
+ unsigned ADDri) const {
+
+ DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+ const SparcInstrInfo &TII =
+ *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
+ if (NumBytes >= -4096 && NumBytes < 4096) {
+ BuildMI(MBB, MBBI, dl, TII.get(ADDri), SP::O6)
+ .addReg(SP::O6).addImm(NumBytes);
+ return;
+ }
+
+ // Emit this the hard way. This clobbers G1 which we always know is
+ // available here.
+ if (NumBytes >= 0) {
+ // Emit nonnegative numbers with sethi + or.
+ // sethi %hi(NumBytes), %g1
+ // or %g1, %lo(NumBytes), %g1
+ // add %sp, %g1, %sp
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HI22(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+ .addReg(SP::G1).addImm(LO10(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+ return ;
+ }
+
+ // Emit negative numbers with sethi + xor.
+ // sethi %hix(NumBytes), %g1
+ // xor %g1, %lox(NumBytes), %g1
+ // add %sp, %g1, %sp
+ BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HIX22(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(SP::XORri), SP::G1)
+ .addReg(SP::G1).addImm(LOX10(NumBytes));
+ BuildMI(MBB, MBBI, dl, TII.get(ADDrr), SP::O6)
+ .addReg(SP::O6).addReg(SP::G1);
+}
+
void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
@@ -55,21 +100,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SAVErr = SP::ADDrr;
}
NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
-
- if (NumBytes >= -4096) {
- BuildMI(MBB, MBBI, dl, TII.get(SAVEri), SP::O6)
- .addReg(SP::O6).addImm(NumBytes);
- } else {
- // Emit this the hard way. This clobbers G1 which we always know is
- // available here.
- unsigned OffHi = (unsigned)NumBytes >> 10U;
- BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
- .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
- BuildMI(MBB, MBBI, dl, TII.get(SAVErr), SP::O6)
- .addReg(SP::O6).addReg(SP::G1);
- }
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SP::PROLOG_LABEL)).addSym(FrameLabel);
+
+ unsigned regFP = MRI->getDwarfRegNum(SP::I6, true);
+
+ // Emit ".cfi_def_cfa_register 30".
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel,
+ regFP));
+ // Emit ".cfi_window_save".
+ MMI.addFrameInst(MCCFIInstruction::createWindowSave(FrameLabel));
+
+ unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
+ unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
+ // Emit ".cfi_register 15, 31".
+ MMI.addFrameInst(MCCFIInstruction::createRegister(FrameLabel,
+ regOutRA,
+ regInRA));
}
void SparcFrameLowering::
@@ -77,15 +128,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
if (!hasReservedCallFrame(MF)) {
MachineInstr &MI = *I;
- DebugLoc DL = MI.getDebugLoc();
int Size = MI.getOperand(0).getImm();
if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
Size = -Size;
- const SparcInstrInfo &TII =
- *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+
if (Size)
- BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
- .addImm(Size);
+ emitSPAdjustment(MF, MBB, I, Size, SP::ADDrr, SP::ADDri);
}
MBB.erase(I);
}
@@ -112,21 +160,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
return;
NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
-
- if (NumBytes < 4096) {
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
- .addReg(SP::O6).addImm(NumBytes);
- } else {
- // Emit this the hard way. This clobbers G1 which we always know is
- // available here.
- unsigned OffHi = (unsigned)NumBytes >> 10U;
- BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
- .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
- BuildMI(MBB, MBBI, dl, TII.get(SP::ADDrr), SP::O6)
- .addReg(SP::O6).addReg(SP::G1);
- }
+ emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 8eaef59..072fde3 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -49,6 +49,14 @@ private:
// Returns true if MF is a leaf procedure.
bool isLeafProc(MachineFunction &MF) const;
+
+
+ // Emits code for adjusting SP in function prologue/epilogue.
+ void emitSPAdjustment(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ int NumBytes, unsigned ADDrr, unsigned ADDri) const;
+
};
} // End llvm namespace
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index db62151..b012bfd 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -80,7 +80,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
return true;
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
@@ -117,7 +118,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() == ISD::FrameIndex) return false;
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() == ISD::TargetGlobalTLSAddress)
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
@@ -139,8 +141,10 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
- if (N->isMachineOpcode())
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
return NULL; // Already selected.
+ }
switch (N->getOpcode()) {
default: break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4b0fa67..64625f7 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -14,6 +14,7 @@
#include "SparcISelLowering.h"
#include "SparcMachineFunctionInfo.h"
+#include "SparcRegisterInfo.h"
#include "SparcTargetMachine.h"
#include "MCTargetDesc/SparcBaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -648,6 +649,27 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
return LowerCall_32(CLI, InVals);
}
+static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
+ ImmutableCallSite *CS) {
+ if (CS)
+ return CS->hasFnAttr(Attribute::ReturnsTwice);
+
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ const char *CalleeName = E->getSymbol();
+ CalleeFn = M->getFunction(CalleeName);
+ }
+
+ if (!CalleeFn)
+ return false;
+ return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
+}
+
// Lower a call for the 32-bit ABI.
SDValue
SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
@@ -861,6 +883,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
@@ -880,6 +903,16 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const SparcRegisterInfo *TRI =
+ ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ const uint32_t *Mask = ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CallConv)
+ : TRI->getCallPreservedMask(CallConv));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -908,6 +941,23 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
return Chain;
}
+// This functions returns true if CalleeName is a ABI function that returns
+// a long double (fp128).
+static bool isFP128ABICall(const char *CalleeName)
+{
+ static const char *const ABICalls[] =
+ { "_Q_add", "_Q_sub", "_Q_mul", "_Q_div",
+ "_Q_sqrt", "_Q_neg",
+ "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
+ "_Q_lltoq", "_Q_ulltoq",
+ 0
+ };
+ for (const char * const *I = ABICalls; *I != 0; ++I)
+ if (strcmp(CalleeName, *I) == 0)
+ return true;
+ return false;
+}
+
unsigned
SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
{
@@ -918,7 +968,10 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
dyn_cast<ExternalSymbolSDNode>(Callee)) {
const Function *Fn = DAG.getMachineFunction().getFunction();
const Module *M = Fn->getParent();
- CalleeFn = M->getFunction(E->getSymbol());
+ const char *CalleeName = E->getSymbol();
+ CalleeFn = M->getFunction(CalleeName);
+ if (!CalleeFn && isFP128ABICall(CalleeName))
+ return 16; // Return sizeof(fp128)
}
if (!CalleeFn)
@@ -983,6 +1036,9 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SDLoc DL = CLI.DL;
SDValue Chain = CLI.Chain;
+ // Sparc target does not yet support tail call optimization.
+ CLI.IsTailCall = false;
+
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
@@ -1099,6 +1155,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
SDValue Callee = CLI.Callee;
+ bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CS);
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
@@ -1112,6 +1169,15 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const SparcRegisterInfo *TRI =
+ ((const SparcTargetMachine&)getTargetMachine()).getRegisterInfo();
+ const uint32_t *Mask = ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CLI.CallConv)
+ : TRI->getCallPreservedMask(CLI.CallConv));
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
// Make sure the CopyToReg nodes are glued to the call instruction which
// consumes the registers.
if (InGlue.getNode())
@@ -1244,15 +1310,21 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
+ addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
if (Subtarget->is64Bit())
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
// Turn FP extload into load/fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
// Sparc doesn't have i1 sign extending load
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
@@ -1271,13 +1343,25 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ // ... nor does SparcV9.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ }
+
// Custom expand fp<->sint
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- // Expand fp<->uint
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ // Custom Expand fp<->uint
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f32, Expand);
setOperationAction(ISD::BITCAST, MVT::i32, Expand);
@@ -1286,9 +1370,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f128, Expand);
+
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Expand);
// Sparc doesn't have BRCOND either, it has BR_CC.
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
@@ -1297,18 +1384,34 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::ADDC, MVT::i64, Custom);
+ setOperationAction(ISD::ADDE, MVT::i64, Custom);
+ setOperationAction(ISD::SUBC, MVT::i64, Custom);
+ setOperationAction(ISD::SUBE, MVT::i64, Custom);
setOperationAction(ISD::BITCAST, MVT::f64, Expand);
setOperationAction(ISD::BITCAST, MVT::i64, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
setOperationAction(ISD::SETCC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL , MVT::i64, Expand);
+ setOperationAction(ISD::ROTR , MVT::i64, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
}
// FIXME: There are instructions available for ATOMIC_FENCE
@@ -1321,6 +1424,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FABS, MVT::f64, Custom);
}
+ setOperationAction(ISD::FSIN , MVT::f128, Expand);
+ setOperationAction(ISD::FCOS , MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FREM , MVT::f128, Expand);
+ setOperationAction(ISD::FMA , MVT::f128, Expand);
setOperationAction(ISD::FSIN , MVT::f64, Expand);
setOperationAction(ISD::FCOS , MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
@@ -1339,8 +1447,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f128, Expand);
setOperationAction(ISD::FPOW , MVT::f64, Expand);
setOperationAction(ISD::FPOW , MVT::f32, Expand);
@@ -1352,7 +1462,12 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ }
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -1366,14 +1481,100 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- // No debug info support yet.
- setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+ setExceptionPointerRegister(SP::I0);
+ setExceptionSelectorRegister(SP::I1);
setStackPointerRegisterToSaveRestore(SP::O6);
if (Subtarget->isV9())
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+ if (Subtarget->isV9() && Subtarget->hasHardQuad()) {
+ setOperationAction(ISD::LOAD, MVT::f128, Legal);
+ setOperationAction(ISD::STORE, MVT::f128, Legal);
+ } else {
+ setOperationAction(ISD::LOAD, MVT::f128, Custom);
+ setOperationAction(ISD::STORE, MVT::f128, Custom);
+ }
+
+ if (Subtarget->hasHardQuad()) {
+ setOperationAction(ISD::FADD, MVT::f128, Legal);
+ setOperationAction(ISD::FSUB, MVT::f128, Legal);
+ setOperationAction(ISD::FMUL, MVT::f128, Legal);
+ setOperationAction(ISD::FDIV, MVT::f128, Legal);
+ setOperationAction(ISD::FSQRT, MVT::f128, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
+ if (Subtarget->isV9()) {
+ setOperationAction(ISD::FNEG, MVT::f128, Legal);
+ setOperationAction(ISD::FABS, MVT::f128, Legal);
+ } else {
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+ }
+
+ if (!Subtarget->is64Bit()) {
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
+ }
+
+ } else {
+ // Custom legalize f128 operations.
+
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FSQRT, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+
+ // Setup Runtime library names.
+ if (Subtarget->is64Bit()) {
+ setLibcallName(RTLIB::ADD_F128, "_Qp_add");
+ setLibcallName(RTLIB::SUB_F128, "_Qp_sub");
+ setLibcallName(RTLIB::MUL_F128, "_Qp_mul");
+ setLibcallName(RTLIB::DIV_F128, "_Qp_div");
+ setLibcallName(RTLIB::SQRT_F128, "_Qp_sqrt");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Qp_qtoi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Qp_qtoui");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Qp_itoq");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Qp_uitoq");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Qp_qtox");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Qp_qtoux");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Qp_xtoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Qp_uxtoq");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "_Qp_stoq");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "_Qp_dtoq");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "_Qp_qtos");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "_Qp_qtod");
+ } else {
+ setLibcallName(RTLIB::ADD_F128, "_Q_add");
+ setLibcallName(RTLIB::SUB_F128, "_Q_sub");
+ setLibcallName(RTLIB::MUL_F128, "_Q_mul");
+ setLibcallName(RTLIB::DIV_F128, "_Q_div");
+ setLibcallName(RTLIB::SQRT_F128, "_Q_sqrt");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Q_qtoi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Q_qtou");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Q_itoq");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Q_utoq");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "_Q_stoq");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "_Q_dtoq");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "_Q_qtos");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "_Q_qtod");
+ }
+ }
+
setMinFunctionAlignment(2);
computeRegisterProperties();
@@ -1394,13 +1595,24 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::Lo: return "SPISD::Lo";
case SPISD::FTOI: return "SPISD::FTOI";
case SPISD::ITOF: return "SPISD::ITOF";
+ case SPISD::FTOX: return "SPISD::FTOX";
+ case SPISD::XTOF: return "SPISD::XTOF";
case SPISD::CALL: return "SPISD::CALL";
case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
case SPISD::FLUSHW: return "SPISD::FLUSHW";
+ case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
+ case SPISD::TLS_LD: return "SPISD::TLS_LD";
+ case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
}
}
+EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
/// be zero. Op is expected to be a target specific node. Used by DAG
/// combiner.
@@ -1505,6 +1717,10 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+ // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+ // function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setHasCalls(true);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
MachinePointerInfo::getGOT(), false, false, false, 0);
}
@@ -1513,6 +1729,7 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
switch(getTargetMachine().getCodeModel()) {
default:
llvm_unreachable("Unsupported absolute code model");
+ case CodeModel::JITDefault:
case CodeModel::Small:
// abs32.
return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
@@ -1549,23 +1766,430 @@ SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op,
return makeAddress(Op, DAG);
}
-static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ SDLoc DL(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+
+ TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+
+ if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+ unsigned HiTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_HI22
+ : SPII::MO_TLS_LDM_HI22);
+ unsigned LoTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_LO10
+ : SPII::MO_TLS_LDM_LO10);
+ unsigned addTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_ADD
+ : SPII::MO_TLS_LDM_ADD);
+ unsigned callTF = ((model == TLSModel::GeneralDynamic)? SPII::MO_TLS_GD_CALL
+ : SPII::MO_TLS_LDM_CALL);
+
+ SDValue HiLo = makeHiLoPair(Op, HiTF, LoTF, DAG);
+ SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
+ SDValue Argument = DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Base, HiLo,
+ withTargetFlags(Op, addTF, DAG));
+
+ SDValue Chain = DAG.getEntryNode();
+ SDValue InFlag;
+
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(1, true), DL);
+ Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
+ InFlag = Chain.getValue(1);
+ SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
+ SDValue Symbol = withTargetFlags(Op, callTF, DAG);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ Ops.push_back(Symbol);
+ Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
+ const uint32_t *Mask = getTargetMachine()
+ .getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ Ops.push_back(InFlag);
+ Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true),
+ DAG.getIntPtrConstant(0, true), InFlag, DL);
+ InFlag = Chain.getValue(1);
+ SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);
+
+ if (model != TLSModel::LocalDynamic)
+ return Ret;
+
+ SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_HIX22, DAG));
+ SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_LOX10, DAG));
+ HiLo = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
+ return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Ret, HiLo,
+ withTargetFlags(Op, SPII::MO_TLS_LDO_ADD, DAG));
+ }
+
+ if (model == TLSModel::InitialExec) {
+ unsigned ldTF = ((PtrVT == MVT::i64)? SPII::MO_TLS_IE_LDX
+ : SPII::MO_TLS_IE_LD);
+
+ SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
+
+ // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
+ // function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setHasCalls(true);
+
+ SDValue TGA = makeHiLoPair(Op,
+ SPII::MO_TLS_IE_HI22, SPII::MO_TLS_IE_LO10, DAG);
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, TGA);
+ SDValue Offset = DAG.getNode(SPISD::TLS_LD,
+ DL, PtrVT, Ptr,
+ withTargetFlags(Op, ldTF, DAG));
+ return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT,
+ DAG.getRegister(SP::G7, PtrVT), Offset,
+ withTargetFlags(Op, SPII::MO_TLS_IE_ADD, DAG));
+ }
+
+ assert(model == TLSModel::LocalExec);
+ SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LE_HIX22, DAG));
+ SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
+ withTargetFlags(Op, SPII::MO_TLS_LE_LOX10, DAG));
+ SDValue Offset = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getRegister(SP::G7, PtrVT), Offset);
+}
+
+SDValue
+SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
+ SDValue Arg, SDLoc DL,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ ArgListEntry Entry;
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+
+ if (ArgTy->isFP128Ty()) {
+ // Create a stack object and pass the pointer to the library function.
+ int FI = MFI->CreateStackObject(16, 8, false);
+ SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+ Chain = DAG.getStore(Chain,
+ DL,
+ Entry.Node,
+ FIPtr,
+ MachinePointerInfo(),
+ false,
+ false,
+ 8);
+
+ Entry.Node = FIPtr;
+ Entry.Ty = PointerType::getUnqual(ArgTy);
+ }
+ Args.push_back(Entry);
+ return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
+ const char *LibFuncName,
+ unsigned numArgs) const {
+
+ ArgListTy Args;
+
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+ SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy());
+ Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+ Type *RetTyABI = RetTy;
+ SDValue Chain = DAG.getEntryNode();
+ SDValue RetPtr;
+
+ if (RetTy->isFP128Ty()) {
+ // Create a Stack Object to receive the return value of type f128.
+ ArgListEntry Entry;
+ int RetFI = MFI->CreateStackObject(16, 8, false);
+ RetPtr = DAG.getFrameIndex(RetFI, getPointerTy());
+ Entry.Node = RetPtr;
+ Entry.Ty = PointerType::getUnqual(RetTy);
+ if (!Subtarget->is64Bit())
+ Entry.isSRet = true;
+ Entry.isReturned = false;
+ Args.push_back(Entry);
+ RetTyABI = Type::getVoidTy(*DAG.getContext());
+ }
+
+ assert(Op->getNumOperands() >= numArgs && "Not enough operands!");
+ for (unsigned i = 0, e = numArgs; i != e; ++i) {
+ Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG);
+ }
+ TargetLowering::
+ CallLoweringInfo CLI(Chain,
+ RetTyABI,
+ false, false, false, false,
+ 0, CallingConv::C,
+ false, false, true,
+ Callee, Args, DAG, SDLoc(Op));
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ // chain is in second result.
+ if (RetTyABI == RetTy)
+ return CallInfo.first;
+
+ assert (RetTy->isFP128Ty() && "Unexpected return type!");
+
+ Chain = CallInfo.second;
+
+ // Load RetPtr to get the return value.
+ return DAG.getLoad(Op.getValueType(),
+ SDLoc(Op),
+ Chain,
+ RetPtr,
+ MachinePointerInfo(),
+ false, false, false, 8);
+}
+
+SDValue
+SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
+ unsigned &SPCC,
+ SDLoc DL,
+ SelectionDAG &DAG) const {
+
+ const char *LibCall = 0;
+ bool is64Bit = Subtarget->is64Bit();
+ switch(SPCC) {
+ default: llvm_unreachable("Unhandled conditional code!");
+ case SPCC::FCC_E : LibCall = is64Bit? "_Qp_feq" : "_Q_feq"; break;
+ case SPCC::FCC_NE : LibCall = is64Bit? "_Qp_fne" : "_Q_fne"; break;
+ case SPCC::FCC_L : LibCall = is64Bit? "_Qp_flt" : "_Q_flt"; break;
+ case SPCC::FCC_G : LibCall = is64Bit? "_Qp_fgt" : "_Q_fgt"; break;
+ case SPCC::FCC_LE : LibCall = is64Bit? "_Qp_fle" : "_Q_fle"; break;
+ case SPCC::FCC_GE : LibCall = is64Bit? "_Qp_fge" : "_Q_fge"; break;
+ case SPCC::FCC_UL :
+ case SPCC::FCC_ULE:
+ case SPCC::FCC_UG :
+ case SPCC::FCC_UGE:
+ case SPCC::FCC_U :
+ case SPCC::FCC_O :
+ case SPCC::FCC_LG :
+ case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy());
+ Type *RetTy = Type::getInt32Ty(*DAG.getContext());
+ ArgListTy Args;
+ SDValue Chain = DAG.getEntryNode();
+ Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG);
+ Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG);
+
+ TargetLowering::
+ CallLoweringInfo CLI(Chain,
+ RetTy,
+ false, false, false, false,
+ 0, CallingConv::C,
+ false, false, true,
+ Callee, Args, DAG, DL);
+
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+
+ // result is in first, and chain is in second result.
+ SDValue Result = CallInfo.first;
+
+ switch(SPCC) {
+ default: {
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UL : {
+ SDValue Mask = DAG.getTargetConstant(1, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_ULE: {
+ SDValue RHS = DAG.getTargetConstant(2, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UG : {
+ SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+ SPCC = SPCC::ICC_G;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UGE: {
+ SDValue RHS = DAG.getTargetConstant(1, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+
+ case SPCC::FCC_U : {
+ SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+ SPCC = SPCC::ICC_E;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_O : {
+ SDValue RHS = DAG.getTargetConstant(3, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_LG : {
+ SDValue Mask = DAG.getTargetConstant(3, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_NE;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ case SPCC::FCC_UE : {
+ SDValue Mask = DAG.getTargetConstant(3, Result.getValueType());
+ Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
+ SDValue RHS = DAG.getTargetConstant(0, Result.getValueType());
+ SPCC = SPCC::ICC_E;
+ return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
+ }
+ }
+}
+
+static SDValue
+LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+
+ if (Op.getOperand(0).getValueType() == MVT::f64)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPEXT_F64_F128), 1);
+
+ if (Op.getOperand(0).getValueType() == MVT::f32)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1);
+
+ llvm_unreachable("fpextend with non-float operand!");
+ return SDValue(0, 0);
+}
+
+static SDValue
+LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI) {
+ // FP_ROUND on f64 and f32 are legal.
+ if (Op.getOperand(0).getValueType() != MVT::f128)
+ return Op;
+
+ if (Op.getValueType() == MVT::f64)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPROUND_F128_F64), 1);
+ if (Op.getValueType() == MVT::f32)
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1);
+
+ llvm_unreachable("fpround to non-float!");
+ return SDValue(0, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::i32 || VT == MVT::i64);
+
+ // Expand f128 operations to fp128 abi calls.
+ if (Op.getOperand(0).getValueType() == MVT::f128
+ && (!hasHardQuad || !TLI.isTypeLegal(VT))) {
+ const char *libName = TLI.getLibcallName(VT == MVT::i32
+ ? RTLIB::FPTOSINT_F128_I32
+ : RTLIB::FPTOSINT_F128_I64);
+ return TLI.LowerF128Op(Op, DAG, libName, 1);
+ }
+
+ // Expand if the resulting type is illegal.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue(0, 0);
+
+ // Otherwise, Convert the fp value to integer in an FP register.
+ if (VT == MVT::i32)
+ Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+ else
+ Op = DAG.getNode(SPISD::FTOX, dl, MVT::f64, Op.getOperand(0));
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
+ SDLoc dl(Op);
+ EVT OpVT = Op.getOperand(0).getValueType();
+ assert(OpVT == MVT::i32 || (OpVT == MVT::i64));
+
+ EVT floatVT = (OpVT == MVT::i32) ? MVT::f32 : MVT::f64;
+
+ // Expand f128 operations to fp128 ABI calls.
+ if (Op.getValueType() == MVT::f128
+ && (!hasHardQuad || !TLI.isTypeLegal(OpVT))) {
+ const char *libName = TLI.getLibcallName(OpVT == MVT::i32
+ ? RTLIB::SINTTOFP_I32_F128
+ : RTLIB::SINTTOFP_I64_F128);
+ return TLI.LowerF128Op(Op, DAG, libName, 1);
+ }
+
+ // Expand if the operand type is illegal.
+ if (!TLI.isTypeLegal(OpVT))
+ return SDValue(0, 0);
+
+ // Otherwise, Convert the int value to FP in an FP register.
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
+ unsigned opcode = (OpVT == MVT::i32)? SPISD::ITOF : SPISD::XTOF;
+ return DAG.getNode(opcode, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDLoc dl(Op);
- // Convert the fp value to integer in an FP register.
- assert(Op.getValueType() == MVT::i32);
- Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ EVT VT = Op.getValueType();
+
+ // Expand if it does not involve f128 or the target has support for
+ // quad floating point instructions and the resulting type is legal.
+ if (Op.getOperand(0).getValueType() != MVT::f128 ||
+ (hasHardQuad && TLI.isTypeLegal(VT)))
+ return SDValue(0, 0);
+
+ assert(VT == MVT::i32 || VT == MVT::i64);
+
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(VT == MVT::i32
+ ? RTLIB::FPTOUINT_F128_I32
+ : RTLIB::FPTOUINT_F128_I64),
+ 1);
}
-static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDLoc dl(Op);
- assert(Op.getOperand(0).getValueType() == MVT::i32);
- SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- // Convert the int value to FP in an FP register.
- return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+ EVT OpVT = Op.getOperand(0).getValueType();
+ assert(OpVT == MVT::i32 || OpVT == MVT::i64);
+
+ // Expand if it does not involve f128 or the target has support for
+ // quad floating point instructions and the operand type is legal.
+ if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
+ return SDValue(0, 0);
+
+ return TLI.LowerF128Op(Op, DAG,
+ TLI.getLibcallName(OpVT == MVT::i32
+ ? RTLIB::UINTTOFP_I32_F128
+ : RTLIB::UINTTOFP_I64_F128),
+ 1);
}
-static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
@@ -1586,15 +2210,23 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
// 32-bit compares use the icc flags, 64-bit uses the xcc flags.
Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
- if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
- Opc = SPISD::BRFCC;
+ if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
+ Opc = SPISD::BRICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ Opc = SPISD::BRFCC;
+ }
}
return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
DAG.getConstant(SPCC, MVT::i32), CompareFlag);
}
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool hasHardQuad) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -1614,9 +2246,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SPISD::SELECT_ICC : SPISD::SELECT_XCC;
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
} else {
- CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
- Opc = SPISD::SELECT_FCC;
- if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
+ Opc = SPISD::SELECT_ICC;
+ } else {
+ CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+ Opc = SPISD::SELECT_FCC;
+ if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+ }
}
return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
DAG.getConstant(SPCC, MVT::i32), CompareFlag);
@@ -1665,20 +2303,25 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
}
-static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
+ EVT VT = Size->getValueType(0);
SDLoc dl(Op);
unsigned SPReg = SP::O6;
- SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
- SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
// The resultant pointer is actually 16 words from the bottom of the stack,
// to provide a register spill area.
- SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
- DAG.getConstant(96, MVT::i32));
+ unsigned regSpillArea = Subtarget->is64Bit() ? 128 : 96;
+ regSpillArea += Subtarget->getStackPointerBias();
+
+ SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
+ DAG.getConstant(regSpillArea, VT));
SDValue Ops[2] = { NewVal, Chain };
return DAG.getMergeValues(Ops, 2, dl);
}
@@ -1759,12 +2402,12 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
return RetAddr;
}
-static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
+static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG, unsigned opcode)
{
SDLoc dl(Op);
assert(Op.getValueType() == MVT::f64 && "LowerF64Op called on non-double!");
- assert(Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS);
+ assert(opcode == ISD::FNEG || opcode == ISD::FABS);
// Lower fneg/fabs on f64 to fneg/fabs on f32.
// fneg f64 => fneg f32:sub_even, fmov f32:sub_odd.
@@ -1776,7 +2419,7 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32,
SrcReg64);
- Hi32 = DAG.getNode(Op.getOpcode(), dl, MVT::f32, Hi32);
+ Hi32 = DAG.getNode(opcode, dl, MVT::f32, Hi32);
SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, MVT::f64), 0);
@@ -1787,28 +2430,244 @@ static SDValue LowerF64Op(SDValue Op, SelectionDAG &DAG)
return DstReg64;
}
+// Lower a f128 load into two f64 loads.
+static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
+{
+ SDLoc dl(Op);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
+ assert(LdNode && LdNode->getOffset().getOpcode() == ISD::UNDEF
+ && "Unexpected node type");
+
+ unsigned alignment = LdNode->getAlignment();
+ if (alignment > 8)
+ alignment = 8;
+
+ SDValue Hi64 = DAG.getLoad(MVT::f64,
+ dl,
+ LdNode->getChain(),
+ LdNode->getBasePtr(),
+ LdNode->getPointerInfo(),
+ false, false, false, alignment);
+ EVT addrVT = LdNode->getBasePtr().getValueType();
+ SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
+ LdNode->getBasePtr(),
+ DAG.getConstant(8, addrVT));
+ SDValue Lo64 = DAG.getLoad(MVT::f64,
+ dl,
+ LdNode->getChain(),
+ LoPtr,
+ LdNode->getPointerInfo(),
+ false, false, false, alignment);
+
+ SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+
+ SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, MVT::f128);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ MVT::f128,
+ SDValue(InFP128, 0),
+ Hi64,
+ SubRegEven);
+ InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
+ MVT::f128,
+ SDValue(InFP128, 0),
+ Lo64,
+ SubRegOdd);
+ SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1),
+ SDValue(Lo64.getNode(), 1) };
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], 2);
+ SDValue Ops[2] = {SDValue(InFP128,0), OutChain};
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
+// Lower a f128 store into two f64 stores.
+static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
+ assert(StNode && StNode->getOffset().getOpcode() == ISD::UNDEF
+ && "Unexpected node type");
+ SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, MVT::i32);
+ SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, MVT::i32);
+
+ SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl,
+ MVT::f64,
+ StNode->getValue(),
+ SubRegEven);
+ SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ dl,
+ MVT::f64,
+ StNode->getValue(),
+ SubRegOdd);
+
+ unsigned alignment = StNode->getAlignment();
+ if (alignment > 8)
+ alignment = 8;
+
+ SDValue OutChains[2];
+ OutChains[0] = DAG.getStore(StNode->getChain(),
+ dl,
+ SDValue(Hi64, 0),
+ StNode->getBasePtr(),
+ MachinePointerInfo(),
+ false, false, alignment);
+ EVT addrVT = StNode->getBasePtr().getValueType();
+ SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
+ StNode->getBasePtr(),
+ DAG.getConstant(8, addrVT));
+ OutChains[1] = DAG.getStore(StNode->getChain(),
+ dl,
+ SDValue(Lo64, 0),
+ LoPtr,
+ MachinePointerInfo(),
+ false, false, alignment);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], 2);
+}
+
+static SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG,
+ const SparcTargetLowering &TLI,
+ bool is64Bit) {
+ if (Op.getValueType() == MVT::f64)
+ return LowerF64Op(Op, DAG, ISD::FNEG);
+ if (Op.getValueType() == MVT::f128)
+ return TLI.LowerF128Op(Op, DAG, ((is64Bit) ? "_Qp_neg" : "_Q_neg"), 1);
+ return Op;
+}
+
+static SDValue LowerFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
+ if (Op.getValueType() == MVT::f64)
+ return LowerF64Op(Op, DAG, ISD::FABS);
+ if (Op.getValueType() != MVT::f128)
+ return Op;
+
+ // Lower fabs on f128 to fabs on f64
+ // fabs f128 => fabs f64:sub_even64, fmov f64:sub_odd64
+
+ SDLoc dl(Op);
+ SDValue SrcReg128 = Op.getOperand(0);
+ SDValue Hi64 = DAG.getTargetExtractSubreg(SP::sub_even64, dl, MVT::f64,
+ SrcReg128);
+ SDValue Lo64 = DAG.getTargetExtractSubreg(SP::sub_odd64, dl, MVT::f64,
+ SrcReg128);
+ if (isV9)
+ Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64);
+ else
+ Hi64 = LowerF64Op(Hi64, DAG, ISD::FABS);
+
+ SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, MVT::f128), 0);
+ DstReg128 = DAG.getTargetInsertSubreg(SP::sub_even64, dl, MVT::f128,
+ DstReg128, Hi64);
+ DstReg128 = DAG.getTargetInsertSubreg(SP::sub_odd64, dl, MVT::f128,
+ DstReg128, Lo64);
+ return DstReg128;
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+
+ if (Op.getValueType() != MVT::i64)
+ return Op;
+
+ SDLoc dl(Op);
+ SDValue Src1 = Op.getOperand(0);
+ SDValue Src1Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1);
+ SDValue Src1Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src1,
+ DAG.getConstant(32, MVT::i64));
+ Src1Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1Hi);
+
+ SDValue Src2 = Op.getOperand(1);
+ SDValue Src2Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2);
+ SDValue Src2Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src2,
+ DAG.getConstant(32, MVT::i64));
+ Src2Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2Hi);
+
+
+ bool hasChain = false;
+ unsigned hiOpc = Op.getOpcode();
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case ISD::ADDC: hiOpc = ISD::ADDE; break;
+ case ISD::ADDE: hasChain = true; break;
+ case ISD::SUBC: hiOpc = ISD::SUBE; break;
+ case ISD::SUBE: hasChain = true; break;
+ }
+ SDValue Lo;
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue);
+ if (hasChain) {
+ Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo,
+ Op.getOperand(2));
+ } else {
+ Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo);
+ }
+ SDValue Hi = DAG.getNode(hiOpc, dl, VTs, Src1Hi, Src2Hi, Lo.getValue(1));
+ SDValue Carry = Hi.getValue(1);
+
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Lo);
+ Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Hi);
+ Hi = DAG.getNode(ISD::SHL, dl, MVT::i64, Hi,
+ DAG.getConstant(32, MVT::i64));
+
+ SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
+ SDValue Ops[2] = { Dst, Carry };
+ return DAG.getMergeValues(Ops, 2, dl);
+}
+
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+
+ bool hasHardQuad = Subtarget->hasHardQuad();
+ bool is64Bit = Subtarget->is64Bit();
+ bool isV9 = Subtarget->isV9();
+
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
- case ISD::FNEG:
- case ISD::FABS: return LowerF64Op(Op, DAG);
-
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GlobalTLSAddress:
- llvm_unreachable("TLS not implemented for Sparc.");
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
- case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
+ hasHardQuad);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
+ hasHardQuad);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
case ISD::VAARG: return LowerVAARG(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
+ Subtarget);
+
+ case ISD::LOAD: return LowerF128Load(Op, DAG);
+ case ISD::STORE: return LowerF128Store(Op, DAG);
+ case ISD::FADD: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::ADD_F128), 2);
+ case ISD::FSUB: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::SUB_F128), 2);
+ case ISD::FMUL: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::MUL_F128), 2);
+ case ISD::FDIV: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::DIV_F128), 2);
+ case ISD::FSQRT: return LowerF128Op(Op, DAG,
+ getLibcallName(RTLIB::SQRT_F128),1);
+ case ISD::FNEG: return LowerFNEG(Op, DAG, *this, is64Bit);
+ case ISD::FABS: return LowerFABS(Op, DAG, isV9);
+ case ISD::FP_EXTEND: return LowerF128_FPEXTEND(Op, DAG, *this);
+ case ISD::FP_ROUND: return LowerF128_FPROUND(Op, DAG, *this);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
}
}
@@ -1825,11 +2684,13 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case SP::SELECT_CC_Int_ICC:
case SP::SELECT_CC_FP_ICC:
case SP::SELECT_CC_DFP_ICC:
+ case SP::SELECT_CC_QFP_ICC:
BROpcode = SP::BCOND;
break;
case SP::SELECT_CC_Int_FCC:
case SP::SELECT_CC_FP_FCC:
case SP::SELECT_CC_DFP_FCC:
+ case SP::SELECT_CC_QFP_FCC:
BROpcode = SP::FBCOND;
break;
}
@@ -1924,3 +2785,50 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The Sparc target isn't yet aware of offsets.
return false;
}
+
+void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const {
+
+ SDLoc dl(N);
+
+ RTLIB::Libcall libCall = RTLIB::UNKNOWN_LIBCALL;
+
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ // Custom lower only if it involves f128 or i64.
+ if (N->getOperand(0).getValueType() != MVT::f128
+ || N->getValueType(0) != MVT::i64)
+ return;
+ libCall = ((N->getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::FPTOSINT_F128_I64
+ : RTLIB::FPTOUINT_F128_I64);
+
+ Results.push_back(LowerF128Op(SDValue(N, 0),
+ DAG,
+ getLibcallName(libCall),
+ 1));
+ return;
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // Custom lower only if it involves f128 or i64.
+ if (N->getValueType(0) != MVT::f128
+ || N->getOperand(0).getValueType() != MVT::i64)
+ return;
+
+ libCall = ((N->getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::SINTTOFP_I64_F128
+ : RTLIB::UINTTOFP_I64_F128);
+
+ Results.push_back(LowerF128Op(SDValue(N, 0),
+ DAG,
+ getLibcallName(libCall),
+ 1));
+ return;
+ }
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 261c25a..2659fc8 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -37,11 +37,17 @@ namespace llvm {
FTOI, // FP to Int within a FP register.
ITOF, // Int to FP within a FP register.
+ FTOX, // FP to Int64 within a FP register.
+ XTOF, // Int64 to FP within a FP register.
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
- GLOBAL_BASE_REG, // Global base reg for PIC
- FLUSHW // FLUSH register windows to stack
+ GLOBAL_BASE_REG, // Global base reg for PIC.
+ FLUSHW, // FLUSH register windows to stack.
+
+ TLS_ADD, // For Thread Local Storage (TLS).
+ TLS_LD,
+ TLS_CALL
};
}
@@ -73,6 +79,9 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -119,6 +128,7 @@ namespace llvm {
SDLoc DL, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -127,6 +137,27 @@ namespace llvm {
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
SelectionDAG &DAG) const;
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
+ SDValue Arg, SDLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerF128Op(SDValue Op, SelectionDAG &DAG,
+ const char *LibFuncName,
+ unsigned numArgs) const;
+ SDValue LowerF128Compare(SDValue LHS, SDValue RHS,
+ unsigned &SPCC,
+ SDLoc DL,
+ SelectionDAG &DAG) const;
+
+ bool ShouldShrinkFPConstant(EVT VT) const {
+ // Do not shrink FP constpool if VT == MVT::f128.
+ // (ldd, call _Q_fdtoq) is more expensive than two ldds.
+ return VT != MVT::f128;
+ }
+
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index 47658ee..8656de5 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -153,15 +153,11 @@ def : Pat<(xor i64:$a, (not i64:$b)), (XNORrr $a, $b)>;
def : Pat<(add i64:$a, i64:$b), (ADDrr $a, $b)>;
def : Pat<(sub i64:$a, i64:$b), (SUBrr $a, $b)>;
-// Add/sub with carry were renamed to addc/subc in SPARC v9.
-def : Pat<(adde i64:$a, i64:$b), (ADDXrr $a, $b)>;
-def : Pat<(sube i64:$a, i64:$b), (SUBXrr $a, $b)>;
-
-def : Pat<(addc i64:$a, i64:$b), (ADDCCrr $a, $b)>;
-def : Pat<(subc i64:$a, i64:$b), (SUBCCrr $a, $b)>;
-
def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>;
+def : Pat<(tlsadd i64:$a, i64:$b, tglobaltlsaddr:$sym),
+ (TLS_ADDrr $a, $b, $sym)>;
+
// Register-immediate instructions.
def : Pat<(and i64:$a, (i64 simm13:$b)), (ANDri $a, (as_i32imm $b))>;
@@ -173,6 +169,14 @@ def : Pat<(sub i64:$a, (i64 simm13:$b)), (SUBri $a, (as_i32imm $b))>;
def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>;
+def : Pat<(ctpop i64:$src), (POPCrr $src)>;
+
+// "LEA" form of add
+def LEAX_ADDri : F3_2<2, 0b000000,
+ (outs I64Regs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set iPTR:$dst, ADDRri:$addr)]>;
+
} // Predicates = [Is64Bit]
@@ -237,6 +241,12 @@ def LDXri : F3_2<3, 0b001011,
(outs I64Regs:$dst), (ins MEMri:$addr),
"ldx [$addr], $dst",
[(set i64:$dst, (load ADDRri:$addr))]>;
+let mayLoad = 1 in
+ def TLS_LDXrr : F3_1<3, 0b001011,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ "ldx [$addr], $dst, $sym",
+ [(set i64:$dst,
+ (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
// Extending loads to i64.
def : Pat<(i64 (zextloadi1 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
@@ -312,9 +322,9 @@ def : Pat<(store (i64 0), ADDRri:$dst), (STXri ADDRri:$dst, (i64 G0))>;
let Predicates = [Is64Bit] in {
let Uses = [ICC] in
-def BPXCC : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "b$cc %xcc, $dst",
- [(SPbrxcc bb:$dst, imm:$cc)]>;
+def BPXCC : BranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "b$cond %xcc, $imm22",
+ [(SPbrxcc bb:$imm22, imm:$cond)]>;
// Conditional moves on %xcc.
let Uses = [ICC], Constraints = "$f = $rd" in {
@@ -340,6 +350,42 @@ def FMOVD_XCC : Pseudo<(outs DFPRegs:$rd),
(SPselectxcc f64:$rs2, f64:$f, imm:$cond))]>;
} // Uses, Constraints
+//===----------------------------------------------------------------------===//
+// 64-bit Floating Point Conversions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def FXTOS : F3_3u<2, 0b110100, 0b010000100,
+ (outs FPRegs:$dst), (ins DFPRegs:$src),
+ "fxtos $src, $dst",
+ [(set FPRegs:$dst, (SPxtof DFPRegs:$src))]>;
+def FXTOD : F3_3u<2, 0b110100, 0b010001000,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fxtod $src, $dst",
+ [(set DFPRegs:$dst, (SPxtof DFPRegs:$src))]>;
+def FXTOQ : F3_3u<2, 0b110100, 0b010001100,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src),
+ "fxtoq $src, $dst",
+ [(set QFPRegs:$dst, (SPxtof DFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
+
+def FSTOX : F3_3u<2, 0b110100, 0b010000001,
+ (outs DFPRegs:$dst), (ins FPRegs:$src),
+ "fstox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox FPRegs:$src))]>;
+def FDTOX : F3_3u<2, 0b110100, 0b010000010,
+ (outs DFPRegs:$dst), (ins DFPRegs:$src),
+ "fdtox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox DFPRegs:$src))]>;
+def FQTOX : F3_3u<2, 0b110100, 0b010000011,
+ (outs DFPRegs:$dst), (ins QFPRegs:$src),
+ "fqtox $src, $dst",
+ [(set DFPRegs:$dst, (SPftox QFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
+
+} // Predicates = [Is64Bit]
+
def : Pat<(SPselectxcc i64:$t, i64:$f, imm:$cond),
(MOVXCCrr $t, $f, imm:$cond)>;
def : Pat<(SPselectxcc (i64 simm11:$t), i64:$f, imm:$cond),
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 6cdf6bc..afa2874 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -47,12 +47,11 @@ class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{29-25} = rd;
}
-class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr,
+class F2_2<bits<3> op2Val, dag outs, dag ins, string asmstr,
list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
bits<4> cond;
bit annul = 0; // currently unused
- let cond = condVal;
let op2 = op2Val;
let Inst{29} = annul;
@@ -112,6 +111,32 @@ class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
let Inst{4-0} = rs2;
}
+// floating-point unary operations.
+class F3_3u<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+ let rs1 = 0;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
+// floating-point compares.
+class F3_3c<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+ string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+ bits<5> rs2;
+
+ let op = opVal;
+ let op3 = op3val;
+ let rd = 0;
+
+ let Inst{13-5} = opfval; // fp opcode
+ let Inst{4-0} = rs2;
+}
+
// Shift by register rs2.
class F3_Sr<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
@@ -150,3 +175,59 @@ multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
!strconcat(OpcStr, " $rs, $shcnt, $rd"),
[(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
}
+
+class F4<bits<6> op3, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSP<outs, ins, asmstr, pattern> {
+ bits<5> rd;
+
+ let op = 2;
+ let Inst{29-25} = rd;
+ let Inst{24-19} = op3;
+}
+
+
+class F4_1<bits<6> op3, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+
+ bits<3> cc;
+ bits<4> cond;
+ bits<5> rs2;
+
+ let Inst{4-0} = rs2;
+ let Inst{11} = cc{0};
+ let Inst{12} = cc{1};
+ let Inst{13} = 0;
+ let Inst{17-14} = cond;
+ let Inst{18} = cc{2};
+
+}
+
+class F4_2<bits<6> op3, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+ bits<3> cc;
+ bits<4> cond;
+ bits<11> simm11;
+
+ let Inst{10-0} = simm11;
+ let Inst{11} = cc{0};
+ let Inst{12} = cc{1};
+ let Inst{13} = 1;
+ let Inst{17-14} = cond;
+ let Inst{18} = cc{2};
+}
+
+class F4_3<bits<6> op3, bits<6> opf_low, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : F4<op3, outs, ins, asmstr, pattern> {
+ bits<4> cond;
+ bits<3> opf_cc;
+ bits<5> rs2;
+
+ let Inst{18} = 0;
+ let Inst{17-14} = cond;
+ let Inst{13-11} = opf_cc;
+ let Inst{10-5} = opf_low;
+ let Inst{4-0} = rs2;
+}
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 6c14bc9..c10b5b3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -24,11 +24,15 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "SparcGenInstrInfo.inc"
using namespace llvm;
+
+// Pin the vtable to this file.
+void SparcInstrInfo::anchor() {}
+
SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
: SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
RI(ST), Subtarget(ST) {
@@ -44,7 +48,8 @@ unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
if (MI->getOpcode() == SP::LDri ||
MI->getOpcode() == SP::LDXri ||
MI->getOpcode() == SP::LDFri ||
- MI->getOpcode() == SP::LDDFri) {
+ MI->getOpcode() == SP::LDDFri ||
+ MI->getOpcode() == SP::LDQFri) {
if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
@@ -64,7 +69,8 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
if (MI->getOpcode() == SP::STri ||
MI->getOpcode() == SP::STXri ||
MI->getOpcode() == SP::STFri ||
- MI->getOpcode() == SP::STDFri) {
+ MI->getOpcode() == SP::STDFri ||
+ MI->getOpcode() == SP::STQFri) {
if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
MI->getOperand(1).getImm() == 0) {
FrameIndex = MI->getOperand(0).getIndex();
@@ -100,14 +106,14 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
case SPCC::FCC_U: return SPCC::FCC_O;
case SPCC::FCC_O: return SPCC::FCC_U;
- case SPCC::FCC_G: return SPCC::FCC_LE;
- case SPCC::FCC_LE: return SPCC::FCC_G;
- case SPCC::FCC_UG: return SPCC::FCC_ULE;
- case SPCC::FCC_ULE: return SPCC::FCC_UG;
- case SPCC::FCC_L: return SPCC::FCC_GE;
- case SPCC::FCC_GE: return SPCC::FCC_L;
- case SPCC::FCC_UL: return SPCC::FCC_UGE;
- case SPCC::FCC_UGE: return SPCC::FCC_UL;
+ case SPCC::FCC_G: return SPCC::FCC_ULE;
+ case SPCC::FCC_LE: return SPCC::FCC_UG;
+ case SPCC::FCC_UG: return SPCC::FCC_LE;
+ case SPCC::FCC_ULE: return SPCC::FCC_G;
+ case SPCC::FCC_L: return SPCC::FCC_UGE;
+ case SPCC::FCC_GE: return SPCC::FCC_UL;
+ case SPCC::FCC_UL: return SPCC::FCC_GE;
+ case SPCC::FCC_UGE: return SPCC::FCC_L;
case SPCC::FCC_LG: return SPCC::FCC_UE;
case SPCC::FCC_UE: return SPCC::FCC_LG;
case SPCC::FCC_NE: return SPCC::FCC_E;
@@ -273,6 +279,16 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
+ unsigned numSubRegs = 0;
+ unsigned movOpc = 0;
+ const unsigned *subRegIdx = 0;
+
+ const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
+ const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
+ const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
+ SP::sub_odd64_then_sub_even,
+ SP::sub_odd64_then_sub_odd };
+
if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -285,23 +301,47 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
// Use two FMOVS instructions.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstr *MovMI = 0;
- unsigned subRegIdx[] = {SP::sub_even, SP::sub_odd};
- for (unsigned i = 0; i != 2; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
- unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
- assert(Dst && Src && "Bad sub-register");
-
- MovMI = BuildMI(MBB, I, DL, get(SP::FMOVS), Dst).addReg(Src);
+ subRegIdx = DFP_FP_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::FMOVS;
+ }
+ } else if (SP::QFPRegsRegClass.contains(DestReg, SrcReg)) {
+ if (Subtarget.isV9()) {
+ if (Subtarget.hasHardQuad()) {
+ BuildMI(MBB, I, DL, get(SP::FMOVQ), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ // Use two FMOVD instructions.
+ subRegIdx = QFP_DFP_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::FMOVD;
}
- // Add implicit super-register defs and kills to the last MovMI.
- MovMI->addRegisterDefined(DestReg, TRI);
- if (KillSrc)
- MovMI->addRegisterKilled(SrcReg, TRI);
+ } else {
+ // Use four FMOVS instructions.
+ subRegIdx = QFP_FP_SubRegsIdx;
+ numSubRegs = 4;
+ movOpc = SP::FMOVS;
}
} else
llvm_unreachable("Impossible reg-to-reg copy");
+
+ if (numSubRegs == 0 || subRegIdx == 0 || movOpc == 0)
+ return;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineInstr *MovMI = 0;
+
+ for (unsigned i = 0; i != numSubRegs; ++i) {
+ unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
+ unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
+ assert(Dst && Src && "Bad sub-register");
+
+ MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+ }
+ // Add implicit super-register defs and kills to the last MovMI.
+ MovMI->addRegisterDefined(DestReg, TRI);
+ if (KillSrc)
+ MovMI->addRegisterKilled(SrcReg, TRI);
}
void SparcInstrInfo::
@@ -321,7 +361,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectAlignment(FI));
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+ if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntRegsRegClass)
@@ -330,9 +370,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
- else if (RC == &SP::DFPRegsRegClass)
+ else if (SP::DFPRegsRegClass.hasSubClassEq(RC))
BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (SP::QFPRegsRegClass.hasSubClassEq(RC))
+ // Use STQFri irrespective of its legality. If STQ is not legal, it will be
+ // lowered into two STDs in eliminateFrameIndex.
+ BuildMI(MBB, I, DL, get(SP::STQFri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else
llvm_unreachable("Can't store this register to stack slot");
}
@@ -362,9 +407,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
- else if (RC == &SP::DFPRegsRegClass)
+ else if (SP::DFPRegsRegClass.hasSubClassEq(RC))
BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
+ else if (SP::QFPRegsRegClass.hasSubClassEq(RC))
+ // Use LDQFri irrespective of its legality. If LDQ is not legal, it will be
+ // lowered into two LDDs in eliminateFrameIndex.
+ BuildMI(MBB, I, DL, get(SP::LDQFri), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
else
llvm_unreachable("Can't load this register from stack slot");
}
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index d0b220b..a86cbcb 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -37,6 +37,7 @@ namespace SPII {
class SparcInstrInfo : public SparcGenInstrInfo {
const SparcRegisterInfo RI;
const SparcSubtarget& Subtarget;
+ virtual void anchor();
public:
explicit SparcInstrInfo(SparcSubtarget &ST);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index d4cac4d..ef7a114 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -39,6 +39,10 @@ def HasNoV9 : Predicate<"!Subtarget.isV9()">;
// HasVIS - This is true when the target processor has VIS extensions.
def HasVIS : Predicate<"Subtarget.isVIS()">;
+// HasHardQuad - This is true when the target processor supports quad floating
+// point instructions.
+def HasHardQuad : Predicate<"Subtarget.hasHardQuad()">;
+
// UseDeprecatedInsts - This predicate is true when the target processor is a
// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
// to use when appropriate. In either of these cases, the instruction selector
@@ -81,6 +85,8 @@ def MEMri : Operand<iPTR> {
let MIOperandInfo = (ops ptr_rc, i32imm);
}
+def TLSSym : Operand<iPTR>;
+
// Branch targets have OtherVT type.
def brtarget : Operand<OtherVT>;
def calltarget : Operand<i32>;
@@ -101,6 +107,15 @@ def SDTSPFTOI :
SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
def SDTSPITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDTSPFTOX :
+SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisFP<1>]>;
+def SDTSPXTOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f64>]>;
+
+def SDTSPtlsadd :
+SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDTSPtlsld :
+SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>;
def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
@@ -113,6 +128,8 @@ def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
def SPftoi : SDNode<"SPISD::FTOI", SDTSPFTOI>;
def SPitof : SDNode<"SPISD::ITOF", SDTSPITOF>;
+def SPftox : SDNode<"SPISD::FTOX", SDTSPFTOX>;
+def SPxtof : SDNode<"SPISD::XTOF", SDTSPXTOF>;
def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>;
@@ -140,6 +157,12 @@ def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
[SDNPHasChain, SDNPSideEffect, SDNPMayStore]>;
+def tlsadd : SDNode<"SPISD::TLS_ADD", SDTSPtlsadd>;
+def tlsld : SDNode<"SPISD::TLS_LD", SDTSPtlsld>;
+def tlscall : SDNode<"SPISD::TLS_CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
}
@@ -242,8 +265,9 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
-def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
- "unimp $val", []>;
+let rd = 0 in
+ def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence. This has to handle all
@@ -263,6 +287,11 @@ let Uses = [ICC], usesCustomInserter = 1 in {
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_ICC PSEUDO!",
[(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>;
+
+ def SELECT_CC_QFP_ICC
+ : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_QFP_ICC PSEUDO!",
+ [(set f128:$dst, (SPselecticc f128:$T, f128:$F, imm:$Cond))]>;
}
let usesCustomInserter = 1, Uses = [FCC] in {
@@ -280,17 +309,21 @@ let usesCustomInserter = 1, Uses = [FCC] in {
: Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
"; SELECT_CC_DFP_FCC PSEUDO!",
[(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>;
+ def SELECT_CC_QFP_FCC
+ : Pseudo<(outs QFPRegs:$dst), (ins QFPRegs:$T, QFPRegs:$F, i32imm:$Cond),
+ "; SELECT_CC_QFP_FCC PSEUDO!",
+ [(set f128:$dst, (SPselectfcc f128:$T, f128:$F, imm:$Cond))]>;
}
// Section A.3 - Synthetic Instructions, p. 85
// special cases of JMPL:
let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
- let rd = O7.Num, rs1 = G0.Num in
+ let rd = 0, rs1 = 15 in
def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
"jmp %o7+$val", [(retflag simm13:$val)]>;
- let rd = I7.Num, rs1 = G0.Num in
+ let rd = 0, rs1 = 31 in
def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
"jmp %i7+$val", []>;
}
@@ -354,56 +387,76 @@ def LDDFri : F3_2<3, 0b100011,
(outs DFPRegs:$dst), (ins MEMri:$addr),
"ldd [$addr], $dst",
[(set f64:$dst, (load ADDRri:$addr))]>;
+def LDQFrr : F3_1<3, 0b100010,
+ (outs QFPRegs:$dst), (ins MEMrr:$addr),
+ "ldq [$addr], $dst",
+ [(set f128:$dst, (load ADDRrr:$addr))]>,
+ Requires<[HasV9, HasHardQuad]>;
+def LDQFri : F3_2<3, 0b100010,
+ (outs QFPRegs:$dst), (ins MEMri:$addr),
+ "ldq [$addr], $dst",
+ [(set f128:$dst, (load ADDRri:$addr))]>,
+ Requires<[HasV9, HasHardQuad]>;
// Section B.4 - Store Integer Instructions, p. 95
def STBrr : F3_1<3, 0b000101,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "stb $src, [$addr]",
- [(truncstorei8 i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "stb $rd, [$addr]",
+ [(truncstorei8 i32:$rd, ADDRrr:$addr)]>;
def STBri : F3_2<3, 0b000101,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "stb $src, [$addr]",
- [(truncstorei8 i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "stb $rd, [$addr]",
+ [(truncstorei8 i32:$rd, ADDRri:$addr)]>;
def STHrr : F3_1<3, 0b000110,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "sth $src, [$addr]",
- [(truncstorei16 i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "sth $rd, [$addr]",
+ [(truncstorei16 i32:$rd, ADDRrr:$addr)]>;
def STHri : F3_2<3, 0b000110,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "sth $src, [$addr]",
- [(truncstorei16 i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "sth $rd, [$addr]",
+ [(truncstorei16 i32:$rd, ADDRri:$addr)]>;
def STrr : F3_1<3, 0b000100,
- (outs), (ins MEMrr:$addr, IntRegs:$src),
- "st $src, [$addr]",
- [(store i32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, IntRegs:$rd),
+ "st $rd, [$addr]",
+ [(store i32:$rd, ADDRrr:$addr)]>;
def STri : F3_2<3, 0b000100,
- (outs), (ins MEMri:$addr, IntRegs:$src),
- "st $src, [$addr]",
- [(store i32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, IntRegs:$rd),
+ "st $rd, [$addr]",
+ [(store i32:$rd, ADDRri:$addr)]>;
// Section B.5 - Store Floating-point Instructions, p. 97
def STFrr : F3_1<3, 0b100100,
- (outs), (ins MEMrr:$addr, FPRegs:$src),
- "st $src, [$addr]",
- [(store f32:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, FPRegs:$rd),
+ "st $rd, [$addr]",
+ [(store f32:$rd, ADDRrr:$addr)]>;
def STFri : F3_2<3, 0b100100,
- (outs), (ins MEMri:$addr, FPRegs:$src),
- "st $src, [$addr]",
- [(store f32:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, FPRegs:$rd),
+ "st $rd, [$addr]",
+ [(store f32:$rd, ADDRri:$addr)]>;
def STDFrr : F3_1<3, 0b100111,
- (outs), (ins MEMrr:$addr, DFPRegs:$src),
- "std $src, [$addr]",
- [(store f64:$src, ADDRrr:$addr)]>;
+ (outs), (ins MEMrr:$addr, DFPRegs:$rd),
+ "std $rd, [$addr]",
+ [(store f64:$rd, ADDRrr:$addr)]>;
def STDFri : F3_2<3, 0b100111,
- (outs), (ins MEMri:$addr, DFPRegs:$src),
- "std $src, [$addr]",
- [(store f64:$src, ADDRri:$addr)]>;
+ (outs), (ins MEMri:$addr, DFPRegs:$rd),
+ "std $rd, [$addr]",
+ [(store f64:$rd, ADDRri:$addr)]>;
+def STQFrr : F3_1<3, 0b100110,
+ (outs), (ins MEMrr:$addr, QFPRegs:$rd),
+ "stq $rd, [$addr]",
+ [(store f128:$rd, ADDRrr:$addr)]>,
+ Requires<[HasV9, HasHardQuad]>;
+def STQFri : F3_2<3, 0b100110,
+ (outs), (ins MEMri:$addr, QFPRegs:$rd),
+ "stq $rd, [$addr]",
+ [(store f128:$rd, ADDRri:$addr)]>,
+ Requires<[HasV9, HasHardQuad]>;
// Section B.9 - SETHI Instruction, p. 104
def SETHIi: F2_1<0b100,
- (outs IntRegs:$dst), (ins i32imm:$src),
- "sethi $src, $dst",
- [(set i32:$dst, SETHIimm:$src)]>;
+ (outs IntRegs:$rd), (ins i32imm:$imm22),
+ "sethi $imm22, $rd",
+ [(set i32:$rd, SETHIimm:$imm22)]>;
// Section B.10 - NOP Instruction, p. 105
// (It's a special case of SETHI)
@@ -449,30 +502,32 @@ defm SRA : F3_12<"sra", 0b100111, sra>;
defm ADD : F3_12<"add", 0b000000, add>;
// "LEA" forms of add (patterns to make tblgen happy)
-def LEA_ADDri : F3_2<2, 0b000000,
- (outs IntRegs:$dst), (ins MEMri:$addr),
- "add ${addr:arith}, $dst",
- [(set iPTR:$dst, ADDRri:$addr)]>;
+let Predicates = [Is32Bit] in
+ def LEA_ADDri : F3_2<2, 0b000000,
+ (outs IntRegs:$dst), (ins MEMri:$addr),
+ "add ${addr:arith}, $dst",
+ [(set iPTR:$dst, ADDRri:$addr)]>;
let Defs = [ICC] in
defm ADDCC : F3_12<"addcc", 0b010000, addc>;
-let Uses = [ICC] in
- defm ADDX : F3_12<"addx", 0b001000, adde>;
+let Uses = [ICC], Defs = [ICC] in
+ defm ADDX : F3_12<"addxcc", 0b011000, adde>;
// Section B.15 - Subtract Instructions, p. 110
defm SUB : F3_12 <"sub" , 0b000100, sub>;
-let Uses = [ICC] in
- defm SUBX : F3_12 <"subx" , 0b001100, sube>;
+let Uses = [ICC], Defs = [ICC] in
+ defm SUBX : F3_12 <"subxcc" , 0b011100, sube>;
-let Defs = [ICC] in {
+let Defs = [ICC] in
defm SUBCC : F3_12 <"subcc", 0b010100, subc>;
+let Defs = [ICC], rd = 0 in {
def CMPrr : F3_1<2, 0b010100,
(outs), (ins IntRegs:$b, IntRegs:$c),
"cmp $b, $c",
[(SPcmpicc i32:$b, i32:$c)]>;
- def CMPri : F3_1<2, 0b010100,
+ def CMPri : F3_2<2, 0b010100,
(outs), (ins IntRegs:$b, i32imm:$c),
"cmp $b, $c",
[(SPcmpicc i32:$b, (i32 simm13:$c))]>;
@@ -502,23 +557,30 @@ defm RESTORE : F3_12np<"restore", 0b111101>;
// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+// unconditional branch class.
+class BranchAlways<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b010, (outs), ins, asmstr, pattern> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 1;
+ let isBarrier = 1;
+}
+
+let cond = 8 in
+ def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>;
+
// conditional branch class:
-class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
- : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+class BranchSP<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b010, (outs), ins, asmstr, pattern> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
}
-let isBarrier = 1 in
- def BA : BranchSP<0b1000, (ins brtarget:$dst),
- "ba $dst",
- [(br bb:$dst)]>;
-
// Indirect branch instructions.
let isTerminator = 1, isBarrier = 1,
hasDelaySlot = 1, isBranch =1,
- isIndirectBranch = 1 in {
+ isIndirectBranch = 1, rd = 0 in {
def BINDrr : F3_1<2, 0b111000,
(outs), (ins MEMrr:$ptr),
"jmp $ptr",
@@ -529,37 +591,31 @@ let isTerminator = 1, isBarrier = 1,
[(brind ADDRri:$ptr)]>;
}
-// FIXME: the encoding for the JIT should look at the condition field.
let Uses = [ICC] in
- def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "b$cc $dst",
- [(SPbricc bb:$dst, imm:$cc)]>;
-
+ def BCOND : BranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "b$cond $imm22",
+ [(SPbricc bb:$imm22, imm:$cond)]>;
// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
// floating-point conditional branch class:
-class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
- : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+class FPBranchSP<dag ins, string asmstr, list<dag> pattern>
+ : F2_2<0b110, (outs), ins, asmstr, pattern> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
}
-// FIXME: the encoding for the JIT should look at the condition field.
let Uses = [FCC] in
- def FBCOND : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
- "fb$cc $dst",
- [(SPbrfcc bb:$dst, imm:$cc)]>;
+ def FBCOND : FPBranchSP<(ins brtarget:$imm22, CCOp:$cond),
+ "fb$cond $imm22",
+ [(SPbrfcc bb:$imm22, imm:$cond)]>;
// Section B.24 - Call and Link Instruction, p. 125
// This is the only Format 1 instruction
let Uses = [O6],
- hasDelaySlot = 1, isCall = 1,
- Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
- D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
- ICC, FCC, Y] in {
+ hasDelaySlot = 1, isCall = 1 in {
def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
"call $dst", []> {
bits<30> disp;
@@ -571,21 +627,21 @@ let Uses = [O6],
def JMPLrr : F3_1<2, 0b111000,
(outs), (ins MEMrr:$ptr, variable_ops),
"call $ptr",
- [(call ADDRrr:$ptr)]>;
+ [(call ADDRrr:$ptr)]> { let rd = 15; }
def JMPLri : F3_2<2, 0b111000,
(outs), (ins MEMri:$ptr, variable_ops),
"call $ptr",
- [(call ADDRri:$ptr)]>;
+ [(call ADDRri:$ptr)]> { let rd = 15; }
}
// Section B.28 - Read State Register Instructions
-let Uses = [Y] in
+let Uses = [Y], rs1 = 0, rs2 = 0 in
def RDY : F3_1<2, 0b101000,
(outs IntRegs:$dst), (ins),
"rd %y, $dst", []>;
// Section B.29 - Write State Register Instructions
-let Defs = [Y] in {
+let Defs = [Y], rd = 0 in {
def WRYrr : F3_1<2, 0b110000,
(outs), (ins IntRegs:$b, IntRegs:$c),
"wr $b, $c, %y", []>;
@@ -594,58 +650,93 @@ let Defs = [Y] in {
"wr $b, $c, %y", []>;
}
// Convert Integer to Floating-point Instructions, p. 141
-def FITOS : F3_3<2, 0b110100, 0b011000100,
+def FITOS : F3_3u<2, 0b110100, 0b011000100,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fitos $src, $dst",
[(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
-def FITOD : F3_3<2, 0b110100, 0b011001000,
+def FITOD : F3_3u<2, 0b110100, 0b011001000,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fitod $src, $dst",
[(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOQ : F3_3u<2, 0b110100, 0b011001100,
+ (outs QFPRegs:$dst), (ins FPRegs:$src),
+ "fitoq $src, $dst",
+ [(set QFPRegs:$dst, (SPitof FPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
// Convert Floating-point to Integer Instructions, p. 142
-def FSTOI : F3_3<2, 0b110100, 0b011010001,
+def FSTOI : F3_3u<2, 0b110100, 0b011010001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fstoi $src, $dst",
[(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
-def FDTOI : F3_3<2, 0b110100, 0b011010010,
+def FDTOI : F3_3u<2, 0b110100, 0b011010010,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtoi $src, $dst",
[(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+def FQTOI : F3_3u<2, 0b110100, 0b011010011,
+ (outs FPRegs:$dst), (ins QFPRegs:$src),
+ "fqtoi $src, $dst",
+ [(set FPRegs:$dst, (SPftoi QFPRegs:$src))]>,
+ Requires<[HasHardQuad]>;
// Convert between Floating-point Formats Instructions, p. 143
-def FSTOD : F3_3<2, 0b110100, 0b011001001,
+def FSTOD : F3_3u<2, 0b110100, 0b011001001,
(outs DFPRegs:$dst), (ins FPRegs:$src),
"fstod $src, $dst",
[(set f64:$dst, (fextend f32:$src))]>;
-def FDTOS : F3_3<2, 0b110100, 0b011000110,
+def FSTOQ : F3_3u<2, 0b110100, 0b011001101,
+ (outs QFPRegs:$dst), (ins FPRegs:$src),
+ "fstoq $src, $dst",
+ [(set f128:$dst, (fextend f32:$src))]>,
+ Requires<[HasHardQuad]>;
+def FDTOS : F3_3u<2, 0b110100, 0b011000110,
(outs FPRegs:$dst), (ins DFPRegs:$src),
"fdtos $src, $dst",
[(set f32:$dst, (fround f64:$src))]>;
+def FDTOQ : F3_3u<2, 0b110100, 0b01101110,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src),
+ "fdtoq $src, $dst",
+ [(set f128:$dst, (fextend f64:$src))]>,
+ Requires<[HasHardQuad]>;
+def FQTOS : F3_3u<2, 0b110100, 0b011000111,
+ (outs FPRegs:$dst), (ins QFPRegs:$src),
+ "fqtos $src, $dst",
+ [(set f32:$dst, (fround f128:$src))]>,
+ Requires<[HasHardQuad]>;
+def FQTOD : F3_3u<2, 0b110100, 0b011001011,
+ (outs DFPRegs:$dst), (ins QFPRegs:$src),
+ "fqtod $src, $dst",
+ [(set f64:$dst, (fround f128:$src))]>,
+ Requires<[HasHardQuad]>;
// Floating-point Move Instructions, p. 144
-def FMOVS : F3_3<2, 0b110100, 0b000000001,
+def FMOVS : F3_3u<2, 0b110100, 0b000000001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fmovs $src, $dst", []>;
-def FNEGS : F3_3<2, 0b110100, 0b000000101,
+def FNEGS : F3_3u<2, 0b110100, 0b000000101,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fnegs $src, $dst",
[(set f32:$dst, (fneg f32:$src))]>;
-def FABSS : F3_3<2, 0b110100, 0b000001001,
+def FABSS : F3_3u<2, 0b110100, 0b000001001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fabss $src, $dst",
[(set f32:$dst, (fabs f32:$src))]>;
// Floating-point Square Root Instructions, p.145
-def FSQRTS : F3_3<2, 0b110100, 0b000101001,
+def FSQRTS : F3_3u<2, 0b110100, 0b000101001,
(outs FPRegs:$dst), (ins FPRegs:$src),
"fsqrts $src, $dst",
[(set f32:$dst, (fsqrt f32:$src))]>;
-def FSQRTD : F3_3<2, 0b110100, 0b000101010,
+def FSQRTD : F3_3u<2, 0b110100, 0b000101010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fsqrtd $src, $dst",
[(set f64:$dst, (fsqrt f64:$src))]>;
+def FSQRTQ : F3_3u<2, 0b110100, 0b000101011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fsqrtq $src, $dst",
+ [(set f128:$dst, (fsqrt f128:$src))]>,
+ Requires<[HasHardQuad]>;
@@ -658,6 +749,12 @@ def FADDD : F3_3<2, 0b110100, 0b001000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"faddd $src1, $src2, $dst",
[(set f64:$dst, (fadd f64:$src1, f64:$src2))]>;
+def FADDQ : F3_3<2, 0b110100, 0b001000011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "faddq $src1, $src2, $dst",
+ [(set f128:$dst, (fadd f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
def FSUBS : F3_3<2, 0b110100, 0b001000101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsubs $src1, $src2, $dst",
@@ -666,6 +763,12 @@ def FSUBD : F3_3<2, 0b110100, 0b001000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fsubd $src1, $src2, $dst",
[(set f64:$dst, (fsub f64:$src1, f64:$src2))]>;
+def FSUBQ : F3_3<2, 0b110100, 0b001000111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fsubq $src1, $src2, $dst",
+ [(set f128:$dst, (fsub f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
// Floating-point Multiply and Divide Instructions, p. 147
def FMULS : F3_3<2, 0b110100, 0b001001001,
@@ -676,11 +779,24 @@ def FMULD : F3_3<2, 0b110100, 0b001001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fmuld $src1, $src2, $dst",
[(set f64:$dst, (fmul f64:$src1, f64:$src2))]>;
+def FMULQ : F3_3<2, 0b110100, 0b001001011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fmulq $src1, $src2, $dst",
+ [(set f128:$dst, (fmul f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
+
def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fsmuld $src1, $src2, $dst",
[(set f64:$dst, (fmul (fextend f32:$src1),
(fextend f32:$src2)))]>;
+def FDMULQ : F3_3<2, 0b110100, 0b001101110,
+ (outs QFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+ "fdmulq $src1, $src2, $dst",
+ [(set f128:$dst, (fmul (fextend f64:$src1),
+ (fextend f64:$src2)))]>,
+ Requires<[HasHardQuad]>;
+
def FDIVS : F3_3<2, 0b110100, 0b001001101,
(outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
"fdivs $src1, $src2, $dst",
@@ -689,21 +805,61 @@ def FDIVD : F3_3<2, 0b110100, 0b001001110,
(outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
"fdivd $src1, $src2, $dst",
[(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>;
+def FDIVQ : F3_3<2, 0b110100, 0b001001111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fdivq $src1, $src2, $dst",
+ [(set f128:$dst, (fdiv f128:$src1, f128:$src2))]>,
+ Requires<[HasHardQuad]>;
// Floating-point Compare Instructions, p. 148
// Note: the 2nd template arg is different for these guys.
// Note 2: the result of a FCMP is not available until the 2nd cycle
-// after the instr is retired, but there is no interlock. This behavior
-// is modelled with a forced noop after the instruction.
+// after the instr is retired, but there is no interlock in Sparc V8.
+// This behavior is modeled with a forced noop after the instruction in
+// DelaySlotFiller.
+
let Defs = [FCC] in {
- def FCMPS : F3_3<2, 0b110101, 0b001010001,
+ def FCMPS : F3_3c<2, 0b110101, 0b001010001,
(outs), (ins FPRegs:$src1, FPRegs:$src2),
- "fcmps $src1, $src2\n\tnop",
+ "fcmps $src1, $src2",
[(SPcmpfcc f32:$src1, f32:$src2)]>;
- def FCMPD : F3_3<2, 0b110101, 0b001010010,
+ def FCMPD : F3_3c<2, 0b110101, 0b001010010,
(outs), (ins DFPRegs:$src1, DFPRegs:$src2),
- "fcmpd $src1, $src2\n\tnop",
+ "fcmpd $src1, $src2",
[(SPcmpfcc f64:$src1, f64:$src2)]>;
+ def FCMPQ : F3_3c<2, 0b110101, 0b001010011,
+ (outs), (ins QFPRegs:$src1, QFPRegs:$src2),
+ "fcmpq $src1, $src2",
+ [(SPcmpfcc f128:$src1, f128:$src2)]>,
+ Requires<[HasHardQuad]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions for Thread Local Storage(TLS).
+//===----------------------------------------------------------------------===//
+
+def TLS_ADDrr : F3_1<2, 0b000000,
+ (outs IntRegs:$rd),
+ (ins IntRegs:$rs1, IntRegs:$rs2, TLSSym:$sym),
+ "add $rs1, $rs2, $rd, $sym",
+ [(set i32:$rd,
+ (tlsadd i32:$rs1, i32:$rs2, tglobaltlsaddr:$sym))]>;
+
+let mayLoad = 1 in
+ def TLS_LDrr : F3_1<3, 0b000000,
+ (outs IntRegs:$dst), (ins MEMrr:$addr, TLSSym:$sym),
+ "ld [$addr], $dst, $sym",
+ [(set i32:$dst,
+ (tlsld ADDRrr:$addr, tglobaltlsaddr:$sym))]>;
+
+let Uses = [O6], isCall = 1, hasDelaySlot = 1 in
+ def TLS_CALL : InstSP<(outs),
+ (ins calltarget:$disp, TLSSym:$sym, variable_ops),
+ "call $disp, $sym",
+ [(tlscall texternalsym:$disp, tglobaltlsaddr:$sym)]> {
+ bits<30> disp;
+ let op = 1;
+ let Inst{29-0} = disp;
}
//===----------------------------------------------------------------------===//
@@ -713,73 +869,108 @@ let Defs = [FCC] in {
// V9 Conditional Moves.
let Predicates = [HasV9], Constraints = "$f = $rd" in {
// Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
- // FIXME: Add instruction encodings for the JIT some day.
- let Uses = [ICC] in {
+ let Uses = [ICC], cc = 0b100 in {
def MOVICCrr
- : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
- "mov$cc %icc, $rs2, $rd",
- [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cc))]>;
+ : F4_1<0b101100, (outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %icc, $rs2, $rd",
+ [(set i32:$rd, (SPselecticc i32:$rs2, i32:$f, imm:$cond))]>;
+
def MOVICCri
- : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
- "mov$cc %icc, $i, $rd",
- [(set i32:$rd, (SPselecticc simm11:$i, i32:$f, imm:$cc))]>;
+ : F4_2<0b101100, (outs IntRegs:$rd),
+ (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond),
+ "mov$cond %icc, $simm11, $rd",
+ [(set i32:$rd,
+ (SPselecticc simm11:$simm11, i32:$f, imm:$cond))]>;
}
- let Uses = [FCC] in {
+ let Uses = [FCC], cc = 0b000 in {
def MOVFCCrr
- : Pseudo<(outs IntRegs:$rd), (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cc),
- "mov$cc %fcc0, $rs2, $rd",
- [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cc))]>;
+ : F4_1<0b101100, (outs IntRegs:$rd),
+ (ins IntRegs:$rs2, IntRegs:$f, CCOp:$cond),
+ "mov$cond %fcc0, $rs2, $rd",
+ [(set i32:$rd, (SPselectfcc i32:$rs2, i32:$f, imm:$cond))]>;
def MOVFCCri
- : Pseudo<(outs IntRegs:$rd), (ins i32imm:$i, IntRegs:$f, CCOp:$cc),
- "mov$cc %fcc0, $i, $rd",
- [(set i32:$rd, (SPselectfcc simm11:$i, i32:$f, imm:$cc))]>;
+ : F4_2<0b101100, (outs IntRegs:$rd),
+ (ins i32imm:$simm11, IntRegs:$f, CCOp:$cond),
+ "mov$cond %fcc0, $simm11, $rd",
+ [(set i32:$rd,
+ (SPselectfcc simm11:$simm11, i32:$f, imm:$cond))]>;
}
- let Uses = [ICC] in {
+ let Uses = [ICC], opf_cc = 0b100 in {
def FMOVS_ICC
- : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
- "fmovs$cc %icc, $rs2, $rd",
- [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000001, (outs FPRegs:$rd),
+ (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond),
+ "fmovs$cond %icc, $rs2, $rd",
+ [(set f32:$rd, (SPselecticc f32:$rs2, f32:$f, imm:$cond))]>;
def FMOVD_ICC
- : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
- "fmovd$cc %icc, $rs2, $rd",
- [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
+ (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %icc, $rs2, $rd",
+ [(set f64:$rd, (SPselecticc f64:$rs2, f64:$f, imm:$cond))]>;
+ def FMOVQ_ICC
+ : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
+ (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %icc, $rs2, $rd",
+ [(set f128:$rd, (SPselecticc f128:$rs2, f128:$f, imm:$cond))]>;
}
- let Uses = [FCC] in {
+ let Uses = [FCC], opf_cc = 0b000 in {
def FMOVS_FCC
- : Pseudo<(outs FPRegs:$rd), (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cc),
- "fmovs$cc %fcc0, $rs2, $rd",
- [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000001, (outs FPRegs:$rd),
+ (ins FPRegs:$rs2, FPRegs:$f, CCOp:$cond),
+ "fmovs$cond %fcc0, $rs2, $rd",
+ [(set f32:$rd, (SPselectfcc f32:$rs2, f32:$f, imm:$cond))]>;
def FMOVD_FCC
- : Pseudo<(outs DFPRegs:$rd), (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cc),
- "fmovd$cc %fcc0, $rs2, $rd",
- [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cc))]>;
+ : F4_3<0b110101, 0b000010, (outs DFPRegs:$rd),
+ (ins DFPRegs:$rs2, DFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %fcc0, $rs2, $rd",
+ [(set f64:$rd, (SPselectfcc f64:$rs2, f64:$f, imm:$cond))]>;
+ def FMOVQ_FCC
+ : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
+ (ins QFPRegs:$rs2, QFPRegs:$f, CCOp:$cond),
+ "fmovd$cond %fcc0, $rs2, $rd",
+ [(set f128:$rd, (SPselectfcc f128:$rs2, f128:$f, imm:$cond))]>;
}
}
// Floating-Point Move Instructions, p. 164 of the V9 manual.
let Predicates = [HasV9] in {
- def FMOVD : F3_3<2, 0b110100, 0b000000010,
+ def FMOVD : F3_3u<2, 0b110100, 0b000000010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fmovd $src, $dst", []>;
- def FNEGD : F3_3<2, 0b110100, 0b000000110,
+ def FMOVQ : F3_3u<2, 0b110100, 0b000000011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fmovq $src, $dst", []>,
+ Requires<[HasHardQuad]>;
+ def FNEGD : F3_3u<2, 0b110100, 0b000000110,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fnegd $src, $dst",
[(set f64:$dst, (fneg f64:$src))]>;
- def FABSD : F3_3<2, 0b110100, 0b000001010,
+ def FNEGQ : F3_3u<2, 0b110100, 0b000000111,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fnegq $src, $dst",
+ [(set f128:$dst, (fneg f128:$src))]>,
+ Requires<[HasHardQuad]>;
+ def FABSD : F3_3u<2, 0b110100, 0b000001010,
(outs DFPRegs:$dst), (ins DFPRegs:$src),
"fabsd $src, $dst",
[(set f64:$dst, (fabs f64:$src))]>;
+ def FABSQ : F3_3u<2, 0b110100, 0b000001011,
+ (outs QFPRegs:$dst), (ins QFPRegs:$src),
+ "fabsq $src, $dst",
+ [(set f128:$dst, (fabs f128:$src))]>,
+ Requires<[HasHardQuad]>;
}
// POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear
// the top 32-bits before using it. To do this clearing, we use a SLLri X,0.
-def POPCrr : F3_1<2, 0b101110,
- (outs IntRegs:$dst), (ins IntRegs:$src),
- "popc $src, $dst", []>, Requires<[HasV9]>;
+let rs1 = 0 in
+ def POPCrr : F3_1<2, 0b101110,
+ (outs IntRegs:$dst), (ins IntRegs:$src),
+ "popc $src, $dst", []>, Requires<[HasV9]>;
def : Pat<(ctpop i32:$src),
(POPCrr (SLLri $src, 0))>;
@@ -801,6 +992,14 @@ def : Pat<(SPlo tglobaladdr:$in), (ORri (i32 G0), tglobaladdr:$in)>;
def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
+// GlobalTLS addresses
+def : Pat<(SPhi tglobaltlsaddr:$in), (SETHIi tglobaltlsaddr:$in)>;
+def : Pat<(SPlo tglobaltlsaddr:$in), (ORri (i32 G0), tglobaltlsaddr:$in)>;
+def : Pat<(add (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
+ (ADDri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+def : Pat<(xor (SPhi tglobaltlsaddr:$in1), (SPlo tglobaltlsaddr:$in2)),
+ (XORri (SETHIi tglobaltlsaddr:$in1), (tglobaltlsaddr:$in2))>;
+
// Blockaddress
def : Pat<(SPhi tblockaddress:$in), (SETHIi tblockaddress:$in)>;
def : Pat<(SPlo tblockaddress:$in), (ORri (i32 G0), tblockaddress:$in)>;
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
new file mode 100644
index 0000000..6493c7d
--- /dev/null
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -0,0 +1,165 @@
+//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the Sparc target.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "jit"
+#include "SparcJITInfo.h"
+#include "SparcRelocations.h"
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Memory.h"
+
+using namespace llvm;
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+extern "C" void SparcCompilationCallback();
+
+extern "C" {
+#if defined (__sparc__)
+ asm(
+ ".text\n"
+ "\t.align 4\n"
+ "\t.global SparcCompilationCallback\n"
+ "\t.type SparcCompilationCallback, #function\n"
+ "SparcCompilationCallback:\n"
+ // Save current register window.
+ "\tsave %sp, -192, %sp\n"
+ // stubaddr+4 is in %g1.
+ "\tcall SparcCompilationCallbackC\n"
+ "\t sub %g1, 4, %o0\n"
+ // restore original register window and
+ // copy %o0 to %g1
+ "\t restore %o0, 0, %g1\n"
+ // call the new stub
+ "\tjmp %g1\n"
+ "\t nop\n"
+ "\t.size SparcCompilationCallback, .-SparcCompilationCallback"
+ );
+
+#else
+ void SparcCompilationCallback() {
+ llvm_unreachable(
+ "Cannot call SparcCompilationCallback() on a non-sparc arch!");
+ }
+#endif
+}
+
+#define HI(Val) (((unsigned)(Val)) >> 10)
+#define LO(Val) (((unsigned)(Val)) & 0x3FF)
+
+#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF))
+#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \
+ | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF))
+#define NOP_INST SETHI_INST(0, 0)
+
+extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
+ // Get the address of the compiled code for this function.
+ intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr);
+
+ // Rewrite the function stub so that we don't end up here every time we
+ // execute the call. We're replacing the first three instructions of the
+ // stub with code that jumps to the compiled function:
+ // sethi %hi(NewVal), %g1
+ // jmp %g1+%lo(NewVal)
+ // nop
+
+ *(intptr_t *)(StubAddr) = SETHI_INST(HI(NewVal), 1);
+ *(intptr_t *)(StubAddr + 4) = JMP_INST(1, LO(NewVal), 0);
+ *(intptr_t *)(StubAddr + 8) = NOP_INST;
+
+ sys::Memory::InvalidateInstructionCache((void*) StubAddr, 12);
+ return (void*)StubAddr;
+}
+
+void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+}
+
+
+TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() {
+ // The stub contains 3 4-byte instructions, aligned at 4 bytes. See
+ // emitFunctionStub for details.
+
+ StubLayout Result = { 3*4, 4 };
+ return Result;
+}
+
+void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE)
+{
+ JCE.emitAlignment(4);
+ void *Addr = (void*) (JCE.getCurrentPCValue());
+ if (!sys::Memory::setRangeWritable(Addr, 12))
+ llvm_unreachable("ERROR: Unable to mark stub writable.");
+
+ intptr_t EmittedAddr;
+ if (Fn != (void*)(intptr_t)SparcCompilationCallback)
+ EmittedAddr = (intptr_t)Fn;
+ else
+ EmittedAddr = (intptr_t)SparcCompilationCallback;
+
+ // sethi %hi(EmittedAddr), %g1
+ // jmp %g1+%lo(EmittedAddr), %g1
+ // nop
+
+ JCE.emitWordBE(SETHI_INST(HI(EmittedAddr), 1));
+ JCE.emitWordBE(JMP_INST(1, LO(EmittedAddr), 1));
+ JCE.emitWordBE(NOP_INST);
+
+ sys::Memory::InvalidateInstructionCache(Addr, 12);
+ if (!sys::Memory::setRangeExecutable(Addr, 12))
+ llvm_unreachable("ERROR: Unable to mark stub executable.");
+
+ return Addr;
+}
+
+TargetJITInfo::LazyResolverFn
+SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+ JITCompilerFunction = F;
+ return SparcCompilationCallback;
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void SparcJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ void *RelocPos = (char*) Function + MR->getMachineCodeOffset();
+ intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
+
+ switch ((SP::RelocationType) MR->getRelocationType()) {
+ case SP::reloc_sparc_hi:
+ ResultPtr = (ResultPtr >> 10) & 0x3fffff;
+ break;
+
+ case SP::reloc_sparc_lo:
+ ResultPtr = (ResultPtr & 0x3ff);
+ break;
+
+ case SP::reloc_sparc_pc30:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff;
+ break;
+
+ case SP::reloc_sparc_pc22:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff;
+ break;
+
+ case SP::reloc_sparc_pc19:
+ ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff;
+ break;
+ }
+ *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
+ }
+}
diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h
new file mode 100644
index 0000000..9c6e488
--- /dev/null
+++ b/lib/Target/Sparc/SparcJITInfo.h
@@ -0,0 +1,67 @@
+//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCJITINFO_H
+#define SPARCJITINFO_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+class SparcTargetMachine;
+
+class SparcJITInfo : public TargetJITInfo {
+
+ bool IsPIC;
+
+ public:
+ explicit SparcJITInfo()
+ : IsPIC(false) {}
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+ // getStubLayout - Returns the size and alignment of the largest call stub
+ // on Sparc.
+ virtual StubLayout getStubLayout();
+
+
+ /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+ /// small native function that simply calls the function at the specified
+ /// address.
+ virtual void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE);
+
+ /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+ virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+ /// relocate - Before the JIT can run a block of code that has been emitted,
+ /// it must rewrite the code to contain the actual addresses of any
+ /// referenced global symbols.
+ virtual void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase);
+
+ /// Initialize - Initialize internal stage for the function being JITted.
+ void Initialize(const MachineFunction &MF, bool isPIC) {
+ IsPIC = isPIC;
+ }
+
+};
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index dc97f06..c98613a 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -40,8 +40,17 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
- static const uint16_t CalleeSavedRegs[] = { 0 };
- return CalleeSavedRegs;
+ return CSR_SaveList;
+}
+
+const uint32_t*
+SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ return CSR_RegMask;
+}
+
+const uint32_t*
+SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const {
+ return RTCSR_RegMask;
}
BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -65,6 +74,15 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SP::G0);
Reserved.set(SP::G6);
Reserved.set(SP::G7);
+
+ // Unaliased double registers are not available in non-V9 targets.
+ if (!Subtarget.isV9()) {
+ for (unsigned n = 0; n != 16; ++n) {
+ for (MCRegAliasIterator AI(SP::D16 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+ }
+
return Reserved;
}
@@ -74,6 +92,62 @@ SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
}
+static void replaceFI(MachineFunction &MF,
+ MachineBasicBlock::iterator II,
+ MachineInstr &MI,
+ DebugLoc dl,
+ unsigned FIOperandNum, int Offset,
+ unsigned FramePtr)
+{
+ // Replace frame index with a frame pointer reference.
+ if (Offset >= -4096 && Offset <= 4095) {
+ // If the offset is small enough to fit in the immediate field, directly
+ // encode it.
+ MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // FIXME: it would be better to scavenge a register here instead of
+ // reserving G1 all of the time.
+ if (Offset >= 0) {
+ // Emit nonnegaive immediates with sethi + or.
+ // sethi %hi(Offset), %g1
+ // add %g1, %fp, %g1
+ // Insert G1+%lo(offset) into the user.
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HI22(Offset));
+
+
+ // Emit G1 = G1 + I6
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(FramePtr);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(LO10(Offset));
+ return;
+ }
+
+ // Emit Negative numbers with sethi + xor
+ // sethi %hix(Offset), %g1
+ // xor %g1, %lox(offset), %g1
+ // add %g1, %fp, %g1
+ // Insert: G1 + 0 into the user.
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1)
+ .addImm(HIX22(Offset));
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::XORri), SP::G1)
+ .addReg(SP::G1).addImm(LOX10(Offset));
+
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+ .addReg(FramePtr);
+ // Insert: G1+%lo(offset) into the user.
+ MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
+}
+
+
void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@@ -98,35 +172,40 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
}
- // Replace frame index with a frame pointer reference.
- if (Offset >= -4096 && Offset <= 4095) {
- // If the offset is small enough to fit in the immediate field, directly
- // encode it.
- MI.getOperand(FIOperandNum).ChangeToRegister(FramePtr, false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
- } else {
- // Otherwise, emit a G1 = SETHI %hi(offset). FIXME: it would be better to
- // scavenge a register here instead of reserving G1 all of the time.
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned OffHi = (unsigned)Offset >> 10U;
- BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
- // Emit G1 = G1 + I6
- BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
- .addReg(FramePtr);
- // Insert: G1+%lo(offset) into the user.
- MI.getOperand(FIOperandNum).ChangeToRegister(SP::G1, false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset & ((1 << 10)-1));
+ if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
+ if (MI.getOpcode() == SP::STQFri) {
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned SrcReg = MI.getOperand(2).getReg();
+ unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
+ unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
+ .addReg(FramePtr).addImm(0).addReg(SrcEvenReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr);
+ MI.setDesc(TII.get(SP::STDFri));
+ MI.getOperand(2).setReg(SrcOddReg);
+ Offset += 8;
+ } else if (MI.getOpcode() == SP::LDQFri) {
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
+ unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
+ MachineInstr *StMI =
+ BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
+ .addReg(FramePtr).addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr);
+
+ MI.setDesc(TII.get(SP::LDDFri));
+ MI.getOperand(0).setReg(DestOddReg);
+ Offset += 8;
+ }
}
+
+ replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr);
+
}
unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
-unsigned SparcRegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned SparcRegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 6b77d4e..00b5a98 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -32,6 +32,9 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const uint32_t* getCallPreservedMask(CallingConv::ID CC) const;
+
+ const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -47,10 +50,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index a59c442..2a575c0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -11,8 +11,8 @@
// Declarations that describe the Sparc register file
//===----------------------------------------------------------------------===//
-class SparcReg<string n> : Register<n> {
- field bits<5> Num;
+class SparcReg<bits<16> Enc, string n> : Register<n> {
+ let HWEncoding = Enc;
let Namespace = "SP";
}
@@ -23,25 +23,31 @@ class SparcCtrlReg<string n>: Register<n> {
let Namespace = "SP" in {
def sub_even : SubRegIndex<32>;
def sub_odd : SubRegIndex<32, 32>;
+def sub_even64 : SubRegIndex<64>;
+def sub_odd64 : SubRegIndex<64, 64>;
}
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers
-class Ri<bits<5> num, string n> : SparcReg<n> {
- let Num = num;
-}
+class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
+
// Rf - 32-bit floating-point registers
-class Rf<bits<5> num, string n> : SparcReg<n> {
- let Num = num;
-}
+class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
+
// Rd - Slots in the FP register file for 64-bit floating-point values.
-class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
- let Num = num;
+class Rd<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
let SubRegs = subregs;
let SubRegIndices = [sub_even, sub_odd];
let CoveredBySubRegs = 1;
}
+// Rq - Slots in the FP register file for 128-bit floating-point values.
+class Rq<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even64, sub_odd64];
+ let CoveredBySubRegs = 1;
+}
+
// Control Registers
def ICC : SparcCtrlReg<"ICC">; // This represents icc and xcc in 64-bit code.
def FCC : SparcCtrlReg<"FCC">;
@@ -135,6 +141,43 @@ def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[85]>;
def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[86]>;
def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
+// Unaliased double precision floating point registers.
+// FIXME: Define DwarfRegNum for these registers.
+def D16 : SparcReg< 1, "F32">;
+def D17 : SparcReg< 3, "F34">;
+def D18 : SparcReg< 5, "F36">;
+def D19 : SparcReg< 7, "F38">;
+def D20 : SparcReg< 9, "F40">;
+def D21 : SparcReg<11, "F42">;
+def D22 : SparcReg<13, "F44">;
+def D23 : SparcReg<15, "F46">;
+def D24 : SparcReg<17, "F48">;
+def D25 : SparcReg<19, "F50">;
+def D26 : SparcReg<21, "F52">;
+def D27 : SparcReg<23, "F54">;
+def D28 : SparcReg<25, "F56">;
+def D29 : SparcReg<27, "F58">;
+def D30 : SparcReg<29, "F60">;
+def D31 : SparcReg<31, "F62">;
+
+// Aliases of the F* registers used to hold 128-bit for values (long doubles).
+def Q0 : Rq< 0, "F0", [D0, D1]>;
+def Q1 : Rq< 4, "F4", [D2, D3]>;
+def Q2 : Rq< 8, "F8", [D4, D5]>;
+def Q3 : Rq<12, "F12", [D6, D7]>;
+def Q4 : Rq<16, "F16", [D8, D9]>;
+def Q5 : Rq<20, "F20", [D10, D11]>;
+def Q6 : Rq<24, "F24", [D12, D13]>;
+def Q7 : Rq<28, "F28", [D14, D15]>;
+def Q8 : Rq< 1, "F32", [D16, D17]>;
+def Q9 : Rq< 5, "F36", [D18, D19]>;
+def Q10 : Rq< 9, "F40", [D20, D21]>;
+def Q11 : Rq<13, "F44", [D22, D23]>;
+def Q12 : Rq<17, "F48", [D24, D25]>;
+def Q13 : Rq<21, "F52", [D26, D27]>;
+def Q14 : Rq<25, "F56", [D28, D29]>;
+def Q15 : Rq<29, "F60", [D30, D31]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -158,4 +201,6 @@ def I64Regs : RegisterClass<"SP", [i64], 64, (add IntRegs)>;
// Floating point register classes.
def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
-def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 31)>;
+
+def QFPRegs : RegisterClass<"SP", [f128], 128, (sequence "Q%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcRelocations.h b/lib/Target/Sparc/SparcRelocations.h
new file mode 100644
index 0000000..388cfe7
--- /dev/null
+++ b/lib/Target/Sparc/SparcRelocations.h
@@ -0,0 +1,41 @@
+//===-- SparcRelocations.h - Sparc Code Relocations -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc target-specific relocation types
+// (for relocation-model=static).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_RELOCATIONS_H
+#define SPARC_RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+ namespace SP {
+ enum RelocationType {
+ // reloc_sparc_hi - upper 22 bits
+ reloc_sparc_hi = 1,
+
+ // reloc_sparc_lo - lower 10 bits
+ reloc_sparc_lo = 2,
+
+ // reloc_sparc_pc30 - pc rel. 30 bits for call
+ reloc_sparc_pc30 = 3,
+
+ // reloc_sparc_pc22 - pc rel. 22 bits for branch
+ reloc_sparc_pc22 = 4,
+
+ // reloc_sparc_pc22 - pc rel. 19 bits for branch with icc/xcc
+ reloc_sparc_pc19 = 5
+ };
+ }
+}
+
+#endif
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index f9ce098..7d09d0e 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -30,7 +30,8 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
IsV9(false),
V8DeprecatedInsts(false),
IsVIS(false),
- Is64Bit(is64Bit) {
+ Is64Bit(is64Bit),
+ HasHardQuad(false) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 2bf599d..0f81cc9 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -29,6 +29,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool V8DeprecatedInsts;
bool IsVIS;
bool Is64Bit;
+ bool HasHardQuad;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
@@ -37,6 +38,7 @@ public:
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+ bool hasHardQuad() const { return HasHardQuad; }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a7355f4..0f93674 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -65,6 +65,13 @@ bool SparcPassConfig::addInstSelector() {
return false;
}
+bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Machine code emitter pass for Sparc.
+ PM.add(createSparcJITCodeEmitterPass(*this, JCE));
+ return false;
+}
+
/// addPreEmitPass - This pass may be implemented by targets that want to run
/// passes immediately before machine code is emitted. This should return
/// true if -print-machineinstrs should print out the code after the passes.
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 081075d..8c9bcd3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -17,6 +17,7 @@
#include "SparcFrameLowering.h"
#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
+#include "SparcJITInfo.h"
#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
#include "llvm/IR/DataLayout.h"
@@ -32,6 +33,7 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcTargetLowering TLInfo;
SparcSelectionDAGInfo TSInfo;
SparcFrameLowering FrameLowering;
+ SparcJITInfo JITInfo;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -52,10 +54,14 @@ public:
virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
+ virtual SparcJITInfo *getJITInfo() {
+ return &JITInfo;
+ }
virtual const DataLayout *getDataLayout() const { return &DL; }
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
diff --git a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index bb71463..4eea163 100644
--- a/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -15,7 +15,9 @@ using namespace llvm;
Target llvm::TheSparcTarget;
Target llvm::TheSparcV9Target;
-extern "C" void LLVMInitializeSparcTargetInfo() {
- RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
- RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+extern "C" void LLVMInitializeSparcTargetInfo() {
+ RegisterTarget<Triple::sparc, /*HasJIT=*/ true>
+ X(TheSparcTarget, "sparc", "Sparc");
+ RegisterTarget<Triple::sparcv9, /*HasJIT=*/ true>
+ Y(TheSparcV9Target, "sparcv9", "Sparc V9");
}
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 58af2c4..763f40c 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -32,6 +32,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
namespace {
enum RegisterKind {
GR32Reg,
+ GRH32Reg,
GR64Reg,
GR128Reg,
ADDR32Reg,
@@ -262,6 +263,8 @@ public:
// Used by the TableGen code to check for particular operand types.
bool isGR32() const { return isReg(GR32Reg); }
+ bool isGRH32() const { return isReg(GRH32Reg); }
+ bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
bool isADDR32() const { return isReg(ADDR32Reg); }
@@ -327,8 +330,9 @@ private:
StringRef Mnemonic);
public:
- SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Initialize the set of available features.
@@ -355,6 +359,14 @@ public:
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
}
OperandMatchResultTy
+ parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
+ }
+ OperandMatchResultTy
+ parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ llvm_unreachable("GRX32 should only be used for pseudo instructions");
+ }
+ OperandMatchResultTy
parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index ab657f6..d21c0a8 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -21,9 +21,11 @@ add_llvm_target(SystemZCodeGen
SystemZISelLowering.cpp
SystemZInstrInfo.cpp
SystemZLongBranch.cpp
+ SystemZMachineFunctionInfo.cpp
SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp
SystemZSelectionDAGInfo.cpp
+ SystemZShortenInst.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 79469b6..fc3c38d 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -48,14 +48,11 @@ extern "C" void LLVMInitializeSystemZDisassembler() {
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned *Regs,
- bool isAddress = false) {
+ const unsigned *Regs) {
assert(RegNo < 16 && "Invalid register");
- if (!isAddress || RegNo) {
- RegNo = Regs[RegNo];
- if (RegNo == 0)
- return MCDisassembler::Fail;
- }
+ RegNo = Regs[RegNo];
+ if (RegNo == 0)
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::CreateReg(RegNo));
return MCDisassembler::Success;
}
@@ -66,6 +63,12 @@ static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
}
+static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
+}
+
static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
@@ -81,7 +84,7 @@ static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, true);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
}
static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 37ebff3..e1e64d3 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -124,18 +124,6 @@ void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
O << *MO.getExpr();
}
-void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- if (MO.isImm()) {
- O << "0x";
- O.write_hex(MO.getImm());
- } else {
- O << *MO.getExpr();
- O << "@PLT";
- }
-}
-
void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
printOperand(MI->getOperand(OpNum), O);
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 30cdee5..734ecf0 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -58,7 +58,6 @@ private:
void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
// Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 027db44..26a8fae 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -35,15 +35,6 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
llvm_unreachable("Unknown fixup kind!");
}
-// If Opcode is a relaxable interprocedural reference, return the relaxed form,
-// otherwise return 0.
-static unsigned getRelaxedOpcode(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRAS: return SystemZ::BRASL;
- }
- return 0;
-}
-
namespace {
class SystemZMCAsmBackend : public MCAsmBackend {
uint8_t OSABI;
@@ -59,14 +50,20 @@ public:
LLVM_OVERRIDE;
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value) const LLVM_OVERRIDE;
- virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+ virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE {
+ return false;
+ }
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *Fragment,
const MCAsmLayout &Layout) const
- LLVM_OVERRIDE;
+ LLVM_OVERRIDE {
+ return false;
+ }
virtual void relaxInstruction(const MCInst &Inst,
- MCInst &Res) const LLVM_OVERRIDE;
+ MCInst &Res) const LLVM_OVERRIDE {
+ llvm_unreachable("SystemZ does do not have assembler relaxation");
+ }
virtual bool writeNopData(uint64_t Count,
MCObjectWriter *OW) const LLVM_OVERRIDE;
virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
@@ -114,28 +111,6 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
-bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
- return getRelaxedOpcode(Inst.getOpcode()) != 0;
-}
-
-bool
-SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
- const MCRelaxableFragment *Fragment,
- const MCAsmLayout &Layout) const {
- // At the moment we just need to relax 16-bit fields to wider fields.
- Value = extractBitsForFixup(Fixup.getKind(), Value);
- return (int16_t)Value != (int64_t)Value;
-}
-
-void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
- MCInst &Res) const {
- unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
- assert(Opcode && "Unexpected insn to relax");
- Res = Inst;
- Res.setOpcode(Opcode);
-}
-
bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
for (uint64_t I = 0; I != Count; ++I)
@@ -143,8 +118,9 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
return true;
}
-MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU) {
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
return new SystemZMCAsmBackend(OSABI);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 9e27aa0..965c41e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -19,10 +19,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(StringRef TT) {
IsLittleEndian = false;
CommentString = "#";
- PCSymbol = ".";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
UsesELFSectionDirectiveForBSS = true;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index d440787..b9ac92a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -10,13 +10,13 @@
#ifndef SystemZTARGETASMINFO_H
#define SystemZTARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class StringRef;
-class SystemZMCAsmInfo : public MCAsmInfo {
+class SystemZMCAsmInfo : public MCAsmInfoELF {
public:
explicit SystemZMCAsmInfo(StringRef TT);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index bda7714..f07ea7b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -79,14 +79,6 @@ private:
SmallVectorImpl<MCFixup> &Fixups) const {
return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
}
- uint64_t getPLT16DBLEncoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
- }
- uint64_t getPLT32DBLEncoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups) const {
- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
- }
};
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 3653192..9e1296b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -28,10 +28,17 @@
using namespace llvm;
const unsigned SystemZMC::GR32Regs[16] = {
- SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
- SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
- SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
- SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+ SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
+ SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
+ SystemZ::R8L, SystemZ::R9L, SystemZ::R10L, SystemZ::R11L,
+ SystemZ::R12L, SystemZ::R13L, SystemZ::R14L, SystemZ::R15L
+};
+
+const unsigned SystemZMC::GRH32Regs[16] = {
+ SystemZ::R0H, SystemZ::R1H, SystemZ::R2H, SystemZ::R3H,
+ SystemZ::R4H, SystemZ::R5H, SystemZ::R6H, SystemZ::R7H,
+ SystemZ::R8H, SystemZ::R9H, SystemZ::R10H, SystemZ::R11H,
+ SystemZ::R12H, SystemZ::R13H, SystemZ::R14H, SystemZ::R15H
};
const unsigned SystemZMC::GR64Regs[16] = {
@@ -69,6 +76,24 @@ const unsigned SystemZMC::FP128Regs[16] = {
SystemZ::F12Q, SystemZ::F13Q, 0, 0
};
+unsigned SystemZMC::getFirstReg(unsigned Reg) {
+ static unsigned Map[SystemZ::NUM_TARGET_REGS];
+ static bool Initialized = false;
+ if (!Initialized) {
+ for (unsigned I = 0; I < 16; ++I) {
+ Map[GR32Regs[I]] = I;
+ Map[GRH32Regs[I]] = I;
+ Map[GR64Regs[I]] = I;
+ Map[GR128Regs[I]] = I;
+ Map[FP32Regs[I]] = I;
+ Map[FP64Regs[I]] = I;
+ Map[FP128Regs[I]] = I;
+ }
+ }
+ assert(Reg < SystemZ::NUM_TARGET_REGS);
+ return Map[Reg];
+}
+
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
@@ -162,7 +187,7 @@ static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
MCCodeEmitter *Emitter,
bool RelaxAll,
bool NoExecStack) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, 0, MAB, OS, Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeSystemZTargetMC() {
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 3c9f0cb..97e325b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -42,11 +42,31 @@ namespace SystemZMC {
// as %r0-%r15. It seems better to provide the same interface for
// all classes though.
extern const unsigned GR32Regs[16];
+ extern const unsigned GRH32Regs[16];
extern const unsigned GR64Regs[16];
extern const unsigned GR128Regs[16];
extern const unsigned FP32Regs[16];
extern const unsigned FP64Regs[16];
extern const unsigned FP128Regs[16];
+
+ // Return the 0-based number of the first architectural register that
+ // contains the given LLVM register. E.g. R1D -> 1.
+ unsigned getFirstReg(unsigned Reg);
+
+ // Return the given register as a GR64.
+ inline unsigned getRegAsGR64(unsigned Reg) {
+ return GR64Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a low GR32.
+ inline unsigned getRegAsGR32(unsigned Reg) {
+ return GR32Regs[getFirstReg(Reg)];
+ }
+
+ // Return the given register as a high GR32.
+ inline unsigned getRegAsGRH32(unsigned Reg) {
+ return GRH32Regs[getFirstReg(Reg)];
+ }
}
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
@@ -54,8 +74,9 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
- StringRef CPU);
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
} // end namespace llvm
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index 2782b63..afa6cf0 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -35,19 +35,11 @@ performance measurements.
--
-We don't support tail calls at present.
-
---
-
-We don't support prefetching yet.
-
---
-
There is no scheduling support.
--
-We don't use the BRANCH ON COUNT or BRANCH ON INDEX families of instruction.
+We don't use the BRANCH ON INDEX instructions.
--
@@ -56,18 +48,7 @@ and conditional returns.
--
-We don't use the condition code results of anything except comparisons.
-
-Implementing this may need something more finely grained than the z_cmp
-and z_ucmp that we have now. It might (or might not) also be useful to
-have a mask of "don't care" values in conditional branches. For example,
-integer comparisons never set CC to 3, so the bottom bit of the CC mask
-isn't particularly relevant. JNLH and JE are equally good for testing
-equality after an integer comparison, etc.
-
---
-
-We don't use the LOAD AND TEST or TEST DATA CLASS instructions.
+We don't use the TEST DATA CLASS instructions.
--
@@ -77,20 +58,16 @@ condition codes. For example, we could use LCDFR instead of LCDBR.
--
-We don't optimize block memory operations.
+We only use MVC, XC and CLC for constant-length block operations.
+We could extend them to variable-length operations too,
+using EXECUTE RELATIVE LONG.
-It's definitely worth using things like MVC, CLC, NC, XC and OC with
-constant lengths. MVCIN may be worthwhile too.
-
-We should probably implement things like memcpy using MVC with EXECUTE.
-Likewise memcmp and CLC. MVCLE and CLCLE could be useful too.
+MVCIN, MVCLE and CLCLE may be worthwhile too.
--
-We don't optimize string operations.
-
-MVST, CLST, SRST and CUSE could be useful here. Some of the TRANSLATE
-family might be too, although they are probably more difficult to exploit.
+We don't use CUSE or the TRANSLATE family of instructions for string
+operations. The TRANSLATE ones are probably more difficult to exploit.
--
@@ -113,14 +90,7 @@ We don't use the halfword forms of LOAD REVERSED and STORE REVERSED
--
-We could take advantage of the various ... UNDER MASK instructions,
-such as ICM and STCM.
-
---
-
-DAGCombiner can detect integer absolute, but there's not yet an associated
-ISD opcode. We could add one and implement it using LOAD POSITIVE.
-Negated absolutes could use LOAD NEGATIVE.
+We don't use ICM or STCM.
--
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index eccc2aa..dcebbad 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -52,6 +52,29 @@ namespace llvm {
const unsigned CCMASK_CS_NE = CCMASK_1;
const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1;
+ // Condition-code mask assignments for a completed SRST loop.
+ const unsigned CCMASK_SRST_FOUND = CCMASK_1;
+ const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
+ const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+
+ // Condition-code mask assignments for TEST UNDER MASK.
+ const unsigned CCMASK_TM_ALL_0 = CCMASK_0;
+ const unsigned CCMASK_TM_MIXED_MSB_0 = CCMASK_1;
+ const unsigned CCMASK_TM_MIXED_MSB_1 = CCMASK_2;
+ const unsigned CCMASK_TM_ALL_1 = CCMASK_3;
+ const unsigned CCMASK_TM_SOME_0 = CCMASK_TM_ALL_1 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_SOME_1 = CCMASK_TM_ALL_0 ^ CCMASK_ANY;
+ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
+ const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
+ const unsigned CCMASK_TM = CCMASK_ANY;
+
+ // The position of the low CC bit in an IPM result.
+ const unsigned IPM_CC = 28;
+
+ // Mask assignments for PFD.
+ const unsigned PFD_READ = 1;
+ const unsigned PFD_WRITE = 2;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
@@ -86,6 +109,7 @@ namespace llvm {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
} // end namespace llvm;
#endif
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3a57ea0..75cbda4 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -19,16 +19,142 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/Mangler.h"
using namespace llvm;
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GR32s.
+static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// GR64 register operands turned into GRH32s.
+static MCInst lowerRIHigh(const MachineInstr *MI, unsigned Opcode) {
+ if (MI->isCompare())
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(1).getImm());
+ else
+ return MCInstBuilder(Opcode)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(1).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+}
+
+// Return an RI instruction like MI with opcode Opcode, but with the
+// R2 register turned into a GR64.
+static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
+ return MCInstBuilder(Opcode)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()))
+ .addImm(MI->getOperand(3).getImm())
+ .addImm(MI->getOperand(4).getImm())
+ .addImm(MI->getOperand(5).getImm());
+}
+
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCInst LoweredMI;
- Lower.lower(MI, LoweredMI);
+ switch (MI->getOpcode()) {
+ case SystemZ::Return:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R14D);
+ break;
+
+ case SystemZ::CallBRASL:
+ LoweredMI = MCInstBuilder(SystemZ::BRASL)
+ .addReg(SystemZ::R14D)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBASR:
+ LoweredMI = MCInstBuilder(SystemZ::BASR)
+ .addReg(SystemZ::R14D)
+ .addReg(MI->getOperand(0).getReg());
+ break;
+
+ case SystemZ::CallJG:
+ LoweredMI = MCInstBuilder(SystemZ::JG)
+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_PLT));
+ break;
+
+ case SystemZ::CallBR:
+ LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
+ break;
+
+ case SystemZ::IILF64:
+ LoweredMI = MCInstBuilder(SystemZ::IILF)
+ .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::IIHF64:
+ LoweredMI = MCInstBuilder(SystemZ::IIHF)
+ .addReg(SystemZMC::getRegAsGRH32(MI->getOperand(0).getReg()))
+ .addImm(MI->getOperand(2).getImm());
+ break;
+
+ case SystemZ::RISBHH:
+ case SystemZ::RISBHL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG);
+ break;
+
+ case SystemZ::RISBLH:
+ case SystemZ::RISBLL:
+ LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
+ break;
+
+#define LOWER_LOW(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
+
+ LOWER_LOW(IILL);
+ LOWER_LOW(IILH);
+ LOWER_LOW(TMLL);
+ LOWER_LOW(TMLH);
+ LOWER_LOW(NILL);
+ LOWER_LOW(NILH);
+ LOWER_LOW(NILF);
+ LOWER_LOW(OILL);
+ LOWER_LOW(OILH);
+ LOWER_LOW(OILF);
+ LOWER_LOW(XILF);
+
+#undef LOWER_LOW
+
+#define LOWER_HIGH(NAME) \
+ case SystemZ::NAME##64: LoweredMI = lowerRIHigh(MI, SystemZ::NAME); break
+
+ LOWER_HIGH(IIHL);
+ LOWER_HIGH(IIHH);
+ LOWER_HIGH(TMHL);
+ LOWER_HIGH(TMHH);
+ LOWER_HIGH(NIHL);
+ LOWER_HIGH(NIHH);
+ LOWER_HIGH(NIHF);
+ LOWER_HIGH(OIHL);
+ LOWER_HIGH(OIHH);
+ LOWER_HIGH(OIHF);
+ LOWER_HIGH(XIHF);
+
+#undef LOWER_HIGH
+
+ default:
+ Lower.lower(MI, LoweredMI);
+ break;
+ }
OutStreamer.EmitInstruction(LoweredMI);
}
@@ -48,7 +174,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
static_cast<SystemZConstantPoolValue*>(MCPV);
const MCExpr *Expr =
- MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+ MCSymbolRefExpr::Create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
@@ -66,7 +192,7 @@ bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
return true;
OS << -int64_t(MI->getOperand(OpNo).getImm());
} else {
- SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+ SystemZMCInstLower Lower(MF->getContext(), *this);
MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
SystemZInstPrinter::printOperand(MO, OS);
}
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index c2d727f..c4f641e 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -23,7 +23,7 @@ def RetCC_SystemZ : CallingConv<[
// call-clobbered argument registers available for code that doesn't
// care about the ABI. (R6 is an argument register too, but is
// call-saved and therefore not suitable for return values.)
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
// ABI-complaint code returns float and double in F0. Make the
@@ -53,7 +53,7 @@ def CC_SystemZ : CallingConv<[
// The first 5 integer arguments are passed in R2-R6. Note that R6
// is call-saved.
- CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+ CCIfType<[i32], CCAssignToReg<[R2L, R3L, R4L, R5L, R6L]>>,
CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
// The first 4 float and double arguments are passed in even registers F0-F6.
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index e9c4f6d..6c70811 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -39,7 +39,7 @@ unsigned SystemZConstantPoolValue::getRelocationInfo() const {
int SystemZConstantPoolValue::
getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
- const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+ const std::vector<MachineConstantPoolEntry> &Constants = CP->getConstants();
for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
if (Constants[I].isMachineConstantPoolEntry() &&
(Constants[I].getAlignment() & AlignMask) == 0) {
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a58da90..acfb491 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -111,7 +111,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
const SystemZTargetMachine &TM,
unsigned GPR64, bool IsImplicit) {
const SystemZRegisterInfo *RI = TM.getRegisterInfo();
- unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+ unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
@@ -420,8 +420,7 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
// Skip the return instruction.
- assert(MBBI->getOpcode() == SystemZ::RET &&
- "Can only insert epilogue into returning blocks");
+ assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");
uint64_t StackSize = getAllocatedStackSize(MF);
if (ZFI->getLowSavedGPR()) {
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index d9794b1..f4a2773 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -107,7 +107,8 @@ static uint64_t allOnes(unsigned int Count) {
//
// (and (rotl Input, Rotate), Mask)
//
-// otherwise. The value has BitSize bits.
+// otherwise. The output value has BitSize bits, although Input may be
+// narrower (in which case the upper bits are don't care).
struct RxSBGOperands {
RxSBGOperands(unsigned Op, SDValue N)
: Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
@@ -128,7 +129,7 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
const SystemZSubtarget &Subtarget;
// Used by SystemZOperands.td to create integer constants.
- inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+ inline SDValue getImm(const SDNode *Node, uint64_t Imm) const {
return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
}
@@ -142,33 +143,39 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Try to fold more of the base or index of AM into AM, where IsBase
// selects between the base and index.
- bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+ bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const;
// Try to describe N in AM, returning true on success.
- bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+ bool selectAddress(SDValue N, SystemZAddressingMode &AM) const;
// Extract individual target operands from matched address AM.
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// Try to match Addr as a FormBD address with displacement type DR.
// Return true on success, storing the base and displacement in
// Base and Disp respectively.
bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp);
+ SDValue &Base, SDValue &Disp) const;
+
+ // Try to match Addr as a FormBDX address with displacement type DR.
+ // Return true on success and if the result had no index. Store the
+ // base and displacement in Base and Disp respectively.
+ bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+ SDValue &Base, SDValue &Disp) const;
// Try to match Addr as a FormBDX* address of form Form with
// displacement type DR. Return true on success, storing the base,
// displacement and index in Base, Disp and Index respectively.
bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR, SDValue Addr,
- SDValue &Base, SDValue &Disp, SDValue &Index);
+ SDValue &Base, SDValue &Disp, SDValue &Index) const;
// PC-relative address matching routines used by SystemZOperands.td.
- bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
- if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+ bool selectPCRelAddress(SDValue Addr, SDValue &Target) const {
+ if (SystemZISD::isPCREL(Addr.getOpcode())) {
Target = Addr.getOperand(0);
return true;
}
@@ -176,64 +183,72 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
}
// BD matching routines used by SystemZOperands.td.
- bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
}
- bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
}
- bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
}
- bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+ bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
}
+ // MVI matching routines used by SystemZOperands.td.
+ bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+ }
+ bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const {
+ return selectMVIAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+ }
+
// BDX matching routines used by SystemZOperands.td.
bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
SystemZAddressingMode::Disp12Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Only128,
Addr, Base, Disp, Index);
}
bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp12Pair,
Addr, Base, Disp, Index);
}
bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
- SDValue &Index) {
+ SDValue &Index) const {
return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
SystemZAddressingMode::Disp20Pair,
Addr, Base, Disp, Index);
@@ -242,21 +257,21 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
// Check whether (or Op (and X InsertMask)) is effectively an insertion
// of X into bits InsertMask of some Y != Op. Return true if so and
// set Op to that Y.
- bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask);
+ bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const;
// Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used.
// Return true on success.
- bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask);
+ bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const;
// Try to fold some of RxSBG.Input into other fields of RxSBG.
// Return true on success.
- bool expandRxSBG(RxSBGOperands &RxSBG);
+ bool expandRxSBG(RxSBGOperands &RxSBG) const;
- // Return an undefined i64 value.
- SDValue getUNDEF64(SDLoc DL);
+ // Return an undefined value of type VT.
+ SDValue getUNDEF(SDLoc DL, EVT VT) const;
// Convert N to VT, if it isn't already.
- SDValue convertTo(SDLoc DL, EVT VT, SDValue N);
+ SDValue convertTo(SDLoc DL, EVT VT, SDValue N) const;
// Try to implement AND or shift node N using RISBG with the zero flag set.
// Return the selected node on success, otherwise return null.
@@ -276,8 +291,26 @@ class SystemZDAGToDAGISel : public SelectionDAGISel {
SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
uint64_t UpperVal, uint64_t LowerVal);
+ // Return true if Load and Store are loads and stores of the same size
+ // and are guaranteed not to overlap. Such operations can be implemented
+ // using block (SS-format) instructions.
+ //
+ // Partial overlap would lead to incorrect code, since the block operations
+ // are logically bytewise, even though they have a fast path for the
+ // non-overlapping case. We also need to avoid full overlap (i.e. two
+ // addresses that might be equal at run time) because although that case
+ // would be handled correctly, it might be implemented by millicode.
+ bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const;
+
+ // N is a (store (load Y), X) pattern. Return true if it can use an MVC
+ // from Y to X.
bool storeLoadCanUseMVC(SDNode *N) const;
+ // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true
+ // if A[1 - I] == X and if N can use a block operation like NC from A[I]
+ // to X.
+ bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const;
+
public:
SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel),
@@ -363,9 +396,9 @@ static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
// between the base and index. Try to fold Op1 into AM's displacement.
static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
- SDValue Op0, ConstantSDNode *Op1) {
+ SDValue Op0, uint64_t Op1) {
// First try adjusting the displacement.
- int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+ int64_t TestDisp = AM.Disp + Op1;
if (selectDisp(AM.DR, TestDisp)) {
changeComponent(AM, IsBase, Op0);
AM.Disp = TestDisp;
@@ -378,7 +411,7 @@ static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
}
bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
- bool IsBase) {
+ bool IsBase) const {
SDValue N = IsBase ? AM.Base : AM.Index;
unsigned Opcode = N.getOpcode();
if (Opcode == ISD::TRUNCATE) {
@@ -398,13 +431,23 @@ bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
return expandAdjDynAlloc(AM, IsBase, Op0);
if (Op0Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+ return expandDisp(AM, IsBase, Op1,
+ cast<ConstantSDNode>(Op0)->getSExtValue());
if (Op1Code == ISD::Constant)
- return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+ return expandDisp(AM, IsBase, Op0,
+ cast<ConstantSDNode>(Op1)->getSExtValue());
if (IsBase && expandIndex(AM, Op0, Op1))
return true;
}
+ if (Opcode == SystemZISD::PCREL_OFFSET) {
+ SDValue Full = N.getOperand(0);
+ SDValue Base = N.getOperand(1);
+ SDValue Anchor = Base.getOperand(0);
+ uint64_t Offset = (cast<GlobalAddressSDNode>(Full)->getOffset() -
+ cast<GlobalAddressSDNode>(Anchor)->getOffset());
+ return expandDisp(AM, IsBase, Base, Offset);
+ }
return false;
}
@@ -483,14 +526,15 @@ static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
// Return true if Addr is suitable for AM, updating AM if so.
bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
- SystemZAddressingMode &AM) {
+ SystemZAddressingMode &AM) const {
// Start out assuming that the address will need to be loaded separately,
// then try to extend it as much as we can.
AM.Base = Addr;
// First try treating the address as a constant.
if (Addr.getOpcode() == ISD::Constant &&
- expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+ expandDisp(AM, true, SDValue(),
+ cast<ConstantSDNode>(Addr)->getSExtValue()))
;
else
// Otherwise try expanding each component.
@@ -530,7 +574,7 @@ static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
Base = AM.Base;
if (!Base.getNode())
// Register 0 means "no base". This is mostly useful for shifts.
@@ -555,7 +599,8 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
EVT VT, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp,
+ SDValue &Index) const {
getAddressOperands(AM, VT, Base, Disp);
Index = AM.Index;
@@ -566,7 +611,7 @@ void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp) {
+ SDValue &Disp) const {
SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -575,10 +620,21 @@ bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
return true;
}
+bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR,
+ SDValue Addr, SDValue &Base,
+ SDValue &Disp) const {
+ SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR);
+ if (!selectAddress(Addr, AM) || AM.Index.getNode())
+ return false;
+
+ getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+ return true;
+}
+
bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
SystemZAddressingMode::DispRange DR,
SDValue Addr, SDValue &Base,
- SDValue &Disp, SDValue &Index) {
+ SDValue &Disp, SDValue &Index) const {
SystemZAddressingMode AM(Form, DR);
if (!selectAddress(Addr, AM))
return false;
@@ -588,7 +644,7 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
}
bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
- uint64_t InsertMask) {
+ uint64_t InsertMask) const {
// We're only interested in cases where the insertion is into some operand
// of Op, rather than into Op itself. The only useful case is an AND.
if (Op.getOpcode() != ISD::AND)
@@ -619,7 +675,8 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
return true;
}
-bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
+bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG,
+ uint64_t Mask) const {
const SystemZInstrInfo *TII = getInstrInfo();
if (RxSBG.Rotate != 0)
Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate));
@@ -631,26 +688,15 @@ bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) {
return false;
}
-// RxSBG.Input is a shift of Count bits in the direction given by IsLeft.
-// Return true if the result depends on the signs or zeros that are
-// shifted in.
-static bool shiftedInBitsMatter(RxSBGOperands &RxSBG, uint64_t Count,
- bool IsLeft) {
- // Work out which bits of the shift result are zeros or sign copies.
- uint64_t ShiftedIn = allOnes(Count);
- if (!IsLeft)
- ShiftedIn <<= RxSBG.BitSize - Count;
-
- // Rotate that mask in the same way as RxSBG.Input is rotated.
+// Return true if any bits of (RxSBG.Input & Mask) are significant.
+static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) {
+ // Rotate the mask in the same way as RxSBG.Input is rotated.
if (RxSBG.Rotate != 0)
- ShiftedIn = ((ShiftedIn << RxSBG.Rotate) |
- (ShiftedIn >> (64 - RxSBG.Rotate)));
-
- // Fail if any of the zero or sign bits are used.
- return (ShiftedIn & RxSBG.Mask) != 0;
+ Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)));
+ return (Mask & RxSBG.Mask) != 0;
}
-bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
+bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
SDValue N = RxSBG.Input;
unsigned Opcode = N.getOpcode();
switch (Opcode) {
@@ -706,7 +752,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
case ISD::ROTL: {
// Any 64-bit rotate left can be merged into the RxSBG.
- if (RxSBG.BitSize != 64)
+ if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64)
return false;
ConstantSDNode *CountNode
= dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
@@ -718,6 +764,19 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return true;
}
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ // Check that the extension bits are don't-care (i.e. are masked out
+ // by the final mask).
+ unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits();
+ if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
+ return false;
+
+ RxSBG.Input = N.getOperand(0);
+ return true;
+ }
+
case ISD::SHL: {
ConstantSDNode *CountNode =
dyn_cast<ConstantSDNode>(N.getOperand(1).getNode());
@@ -725,17 +784,18 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return false;
uint64_t Count = CountNode->getZExtValue();
- if (Count < 1 || Count >= RxSBG.BitSize)
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
return false;
if (RxSBG.Opcode == SystemZ::RNSBG) {
// Treat (shl X, count) as (rotl X, size-count) as long as the bottom
// count bits from RxSBG.Input are ignored.
- if (shiftedInBitsMatter(RxSBG, Count, true))
+ if (maskMatters(RxSBG, allOnes(Count)))
return false;
} else {
// Treat (shl X, count) as (and (rotl X, count), ~0<<count).
- if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count) << Count))
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count) << Count))
return false;
}
@@ -752,18 +812,19 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
return false;
uint64_t Count = CountNode->getZExtValue();
- if (Count < 1 || Count >= RxSBG.BitSize)
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ if (Count < 1 || Count >= BitSize)
return false;
if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) {
// Treat (srl|sra X, count) as (rotl X, size-count) as long as the top
// count bits from RxSBG.Input are ignored.
- if (shiftedInBitsMatter(RxSBG, Count, false))
+ if (maskMatters(RxSBG, allOnes(Count) << (BitSize - Count)))
return false;
} else {
// Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count),
// which is similar to SLL above.
- if (!refineRxSBGMask(RxSBG, allOnes(RxSBG.BitSize - Count)))
+ if (!refineRxSBGMask(RxSBG, allOnes(BitSize - Count)))
return false;
}
@@ -776,24 +837,17 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) {
}
}
-SDValue SystemZDAGToDAGISel::getUNDEF64(SDLoc DL) {
- SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64);
+SDValue SystemZDAGToDAGISel::getUNDEF(SDLoc DL, EVT VT) const {
+ SDNode *N = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
return SDValue(N, 0);
}
-SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) {
- if (N.getValueType() == MVT::i32 && VT == MVT::i64) {
- SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Insert = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG,
- DL, VT, getUNDEF64(DL), N, Index);
- return SDValue(Insert, 0);
- }
- if (N.getValueType() == MVT::i64 && VT == MVT::i32) {
- SDValue Index = CurDAG->getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Extract = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, VT, N, Index);
- return SDValue(Extract, 0);
- }
+SDValue SystemZDAGToDAGISel::convertTo(SDLoc DL, EVT VT, SDValue N) const {
+ if (N.getValueType() == MVT::i32 && VT == MVT::i64)
+ return CurDAG->getTargetInsertSubreg(SystemZ::subreg_l32,
+ DL, VT, getUNDEF(DL, MVT::i64), N);
+ if (N.getValueType() == MVT::i64 && VT == MVT::i32)
+ return CurDAG->getTargetExtractSubreg(SystemZ::subreg_l32, DL, VT, N);
assert(N.getValueType() == VT && "Unexpected value types");
return N;
}
@@ -803,7 +857,8 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0));
unsigned Count = 0;
while (expandRxSBG(RISBG))
- Count += 1;
+ if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
+ Count += 1;
if (Count == 0)
return 0;
if (Count == 1) {
@@ -831,14 +886,22 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
}
+ unsigned Opcode = SystemZ::RISBG;
+ EVT OpcodeVT = MVT::i64;
+ if (VT == MVT::i32 && Subtarget.hasHighWord()) {
+ Opcode = SystemZ::RISBMux;
+ OpcodeVT = MVT::i32;
+ RISBG.Start &= 31;
+ RISBG.End &= 31;
+ }
SDValue Ops[5] = {
- getUNDEF64(SDLoc(N)),
- convertTo(SDLoc(N), MVT::i64, RISBG.Input),
+ getUNDEF(SDLoc(N), OpcodeVT),
+ convertTo(SDLoc(N), OpcodeVT, RISBG.Input),
CurDAG->getTargetConstant(RISBG.Start, MVT::i32),
CurDAG->getTargetConstant(RISBG.End | 128, MVT::i32),
CurDAG->getTargetConstant(RISBG.Rotate, MVT::i32)
};
- N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops);
+ N = CurDAG->getMachineNode(Opcode, SDLoc(N), OpcodeVT, Ops);
return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
}
@@ -852,7 +915,8 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
unsigned Count[] = { 0, 0 };
for (unsigned I = 0; I < 2; ++I)
while (expandRxSBG(RxSBG[I]))
- Count[I] += 1;
+ if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND)
+ Count[I] += 1;
// Do nothing if neither operand is suitable.
if (Count[0] == 0 && Count[1] == 0)
@@ -900,49 +964,64 @@ SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
return Or.getNode();
}
-// N is a (store (load ...), ...) pattern. Return true if it can use MVC.
-bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
- StoreSDNode *Store = cast<StoreSDNode>(N);
- LoadSDNode *Load = cast<LoadSDNode>(Store->getValue().getNode());
+bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
+ LoadSDNode *Load) const {
+ // Check that the two memory operands have the same size.
+ if (Load->getMemoryVT() != Store->getMemoryVT())
+ return false;
- // MVC is logically a bytewise copy, so can't be used for volatile accesses.
+ // Volatility stops an access from being decomposed.
if (Load->isVolatile() || Store->isVolatile())
return false;
- // Prefer not to use MVC if either address can use ... RELATIVE LONG
- // instructions.
- assert(Load->getMemoryVT() == Store->getMemoryVT() &&
- "Should already have checked that the types match");
- uint64_t Size = Load->getMemoryVT().getStoreSize();
- if (Size > 1 && Size <= 8) {
- // Prefer LHRL, LRL and LGRL.
- if (Load->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
- return false;
- // Prefer STHRL, STRL and STGRL.
- if (Store->getBasePtr().getOpcode() == SystemZISD::PCREL_WRAPPER)
- return false;
- }
-
// There's no chance of overlap if the load is invariant.
if (Load->isInvariant())
return true;
- // If both operands are aligned, they must be equal or not overlap.
- if (Load->getAlignment() >= Size && Store->getAlignment() >= Size)
- return true;
-
// Otherwise we need to check whether there's an alias.
const Value *V1 = Load->getSrcValue();
const Value *V2 = Store->getSrcValue();
if (!V1 || !V2)
return false;
+ // Reject equality.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
int64_t End1 = Load->getSrcValueOffset() + Size;
int64_t End2 = Store->getSrcValueOffset() + Size;
+ if (V1 == V2 && End1 == End2)
+ return false;
+
return !AA->alias(AliasAnalysis::Location(V1, End1, Load->getTBAAInfo()),
AliasAnalysis::Location(V2, End2, Store->getTBAAInfo()));
}
+bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const {
+ StoreSDNode *Store = cast<StoreSDNode>(N);
+ LoadSDNode *Load = cast<LoadSDNode>(Store->getValue());
+
+ // Prefer not to use MVC if either address can use ... RELATIVE LONG
+ // instructions.
+ uint64_t Size = Load->getMemoryVT().getStoreSize();
+ if (Size > 1 && Size <= 8) {
+ // Prefer LHRL, LRL and LGRL.
+ if (SystemZISD::isPCREL(Load->getBasePtr().getOpcode()))
+ return false;
+ // Prefer STHRL, STRL and STGRL.
+ if (SystemZISD::isPCREL(Store->getBasePtr().getOpcode()))
+ return false;
+ }
+
+ return canUseBlockOperation(Store, Load);
+}
+
+bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N,
+ unsigned I) const {
+ StoreSDNode *StoreA = cast<StoreSDNode>(N);
+ LoadSDNode *LoadA = cast<LoadSDNode>(StoreA->getValue().getOperand(1 - I));
+ LoadSDNode *LoadB = cast<LoadSDNode>(StoreA->getValue().getOperand(I));
+ return !LoadA->isVolatile() && canUseBlockOperation(StoreA, LoadB);
+}
+
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
@@ -950,6 +1029,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6acdcd4..f6e1853 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -23,8 +23,23 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <cctype>
+
using namespace llvm;
+namespace {
+// Represents a sequence for extracting a 0/1 value from an IPM result:
+// (((X ^ XORValue) + AddValue) >> Bit)
+struct IPMConversion {
+ IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit)
+ : XORValue(xorValue), AddValue(addValue), Bit(bit) {}
+
+ int64_t XORValue;
+ int64_t AddValue;
+ unsigned Bit;
+};
+}
+
// Classify VT as either 32 or 64 bit.
static bool is32Bit(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
@@ -51,7 +66,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MVT PtrVT = getPointerTy();
// Set up the register classes.
- addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
+ if (Subtarget.hasHighWord())
+ addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
+ else
+ addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
@@ -83,8 +101,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
++I) {
MVT VT = MVT::SimpleValueType(I);
if (isTypeLegal(VT)) {
- // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
- setOperationAction(ISD::SETCC, VT, Expand);
+ // Lower SET_CC into an IPM-based sequence.
+ setOperationAction(ISD::SETCC, VT, Custom);
// Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
setOperationAction(ISD::SELECT, VT, Expand);
@@ -128,9 +146,11 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
- // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+ // Use *MUL_LOHI where possible instead of MULH*.
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Custom);
+ setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// We have instructions for signed but not unsigned FP conversion.
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
@@ -165,14 +185,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
- // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
- // but they aren't really worth using. There is no 64-bit SMUL_LOHI,
- // but there is a 64-bit UMUL_LOHI: MLGR.
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
-
// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
@@ -200,6 +212,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+ // Handle prefetches with PFD or PFDRL.
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -209,6 +224,15 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// We can use FI for FRINT.
setOperationAction(ISD::FRINT, VT, Legal);
+ // We can use the extended form of FI for other rounding operations.
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Legal);
+ }
+
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
@@ -255,8 +279,13 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MaxStoresPerMemsetOptSize = 0;
}
-bool
-SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
@@ -305,6 +334,22 @@ bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return AM.Scale == 0 || AM.Scale == 1;
}
+bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
+ if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
+ return false;
+ unsigned FromBits = FromType->getPrimitiveSizeInBits();
+ unsigned ToBits = ToType->getPrimitiveSizeInBits();
+ return FromBits > ToBits;
+}
+
+bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
+ if (!FromVT.isInteger() || !ToVT.isInteger())
+ return false;
+ unsigned FromBits = FromVT.getSizeInBits();
+ unsigned ToBits = ToVT.getSizeInBits();
+ return FromBits > ToBits;
+}
+
//===----------------------------------------------------------------------===//
// Inline asm support
//===----------------------------------------------------------------------===//
@@ -316,6 +361,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
+ case 'h': // High-part register
case 'r': // General-purpose register
return C_RegisterClass;
@@ -358,6 +404,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
+ case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
@@ -438,6 +485,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+ case 'h': // High-part register (an LLVM extension)
+ return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
+
case 'f': // Floating-point register
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
@@ -527,6 +577,17 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
#include "SystemZGenCallingConv.inc"
+bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
+ Type *ToType) const {
+ return isTruncateFree(FromType, ToType);
+}
+
+bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+ return true;
+}
+
// Value is a value that has been passed to us in the location described by VA
// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
// any loads onto Chain.
@@ -689,6 +750,23 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
return Chain;
}
+static bool canUseSiblingCall(CCState ArgCCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs) {
+ // Punt if there are any indirect or stack arguments, or if the call
+ // needs the call-saved argument register R6.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (!VA.isRegLoc())
+ return false;
+ unsigned Reg = VA.getLocReg();
+ if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
+ return false;
+ }
+ return true;
+}
+
SDValue
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -699,26 +777,29 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &isTailCall = CLI.IsTailCall;
+ bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy();
- // SystemZ target does not yet support tail call optimization.
- isTailCall = false;
-
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+ // We don't support GuaranteedTailCallOpt, only automatically-detected
+ // sibling calls.
+ if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs))
+ IsTailCall = false;
+
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Mark the start of the call.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
- DL);
+ if (!IsTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true),
+ DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
@@ -767,22 +848,27 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes, chained and glued together.
- SDValue Glue;
- for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
- RegsToPass[I].second, Glue);
- Glue = Chain.getValue(1);
- }
-
// Accept direct calls by converting symbolic call addresses to the
- // associated Target* opcodes.
+ // associated Target* opcodes. Force %r1 to be used for indirect
+ // tail calls.
+ SDValue Glue;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (IsTailCall) {
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+ }
+
+ // Build a sequence of copy-to-reg nodes, chained and glued together.
+ for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+ RegsToPass[I].second, Glue);
+ Glue = Chain.getValue(1);
}
// The first call operand is the chain and the second is the target address.
@@ -802,6 +888,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ if (IsTailCall)
+ return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size());
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
Glue = Chain.getValue(1);
@@ -910,6 +998,73 @@ static unsigned CCMaskForCondCode(ISD::CondCode CC) {
#undef CONV
}
+// Return a sequence for getting a 1 from an IPM result when CC has a
+// value in CCMask and a 0 when CC has a value in CCValid & ~CCMask.
+// The handling of CC values outside CCValid doesn't matter.
+static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) {
+ // Deal with cases where the result can be taken directly from a bit
+ // of the IPM result.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, 0, SystemZ::IPM_CC + 1);
+
+ // Deal with cases where we can add a value to force the sign bit
+ // to contain the right value. Putting the bit in 31 means we can
+ // use SRL rather than RISBG(L), and also makes it easier to get a
+ // 0/-1 value, so it has priority over the other tests below.
+ //
+ // These sequences rely on the fact that the upper two bits of the
+ // IPM result are zero.
+ uint64_t TopBit = uint64_t(1) << 31;
+ if (CCMask == (CCValid & SystemZ::CCMASK_0))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1)))
+ return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2)))
+ return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_3))
+ return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ // Next try inverting the value and testing a bit. 0/1 could be
+ // handled this way too, but we dealt with that case above.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2)))
+ return IPMConversion(-1, 0, SystemZ::IPM_CC);
+
+ // Handle cases where adding a value forces a non-sign bit to contain
+ // the right value.
+ if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2)))
+ return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3)))
+ return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1);
+
+ // The remaing cases are 1, 2, 0/1/3 and 0/2/3. All these are
+ // can be done by inverting the low CC bit and applying one of the
+ // sign-based extractions above.
+ if (CCMask == (CCValid & SystemZ::CCMASK_1))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & SystemZ::CCMASK_2))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_1
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31);
+ if (CCMask == (CCValid & (SystemZ::CCMASK_0
+ | SystemZ::CCMASK_2
+ | SystemZ::CCMASK_3)))
+ return IPMConversion(1 << SystemZ::IPM_CC,
+ TopBit - (1 << SystemZ::IPM_CC), 31);
+
+ llvm_unreachable("Unexpected CC combination");
+}
+
// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
// can be converted to a comparison against zero, adjust the operands
// as necessary.
@@ -1009,74 +1164,309 @@ static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
CmpOp1 = DAG.getConstant(Value, MVT::i32);
}
-// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
-// is an equality comparison that is better implemented using unsigned
-// rather than signed comparison instructions.
-static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
- SDValue CmpOp1, unsigned CCMask) {
- // The test must be for equality or inequality.
- if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+// Return true if Op is either an unextended load, or a load suitable
+// for integer register-memory comparisons of type ICmpType.
+static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
+ LoadSDNode *Load = dyn_cast<LoadSDNode>(Op.getNode());
+ if (Load) {
+ // There are no instructions to compare a register with a memory byte.
+ if (Load->getMemoryVT() == MVT::i8)
+ return false;
+ // Otherwise decide on extension type.
+ switch (Load->getExtensionType()) {
+ case ISD::NON_EXTLOAD:
+ return true;
+ case ISD::SEXTLOAD:
+ return ICmpType != SystemZICMP::UnsignedOnly;
+ case ISD::ZEXTLOAD:
+ return ICmpType != SystemZICMP::SignedOnly;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+// Return true if it is better to swap comparison operands Op0 and Op1.
+// ICmpType is the type of an integer comparison.
+static bool shouldSwapCmpOperands(SDValue Op0, SDValue Op1,
+ unsigned ICmpType) {
+ // Leave f128 comparisons alone, since they have no memory forms.
+ if (Op0.getValueType() == MVT::f128)
return false;
- if (CmpOp1.getOpcode() == ISD::Constant) {
- uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+ // Always keep a floating-point constant second, since comparisons with
+ // zero can use LOAD TEST and comparisons with other constants make a
+ // natural memory operand.
+ if (isa<ConstantFPSDNode>(Op1))
+ return false;
- // If we're comparing with memory, prefer unsigned comparisons for
- // values that are in the unsigned 16-bit range but not the signed
- // 16-bit range. We want to use CLFHSI and CLGHSI.
- if (CmpOp0.hasOneUse() &&
- ISD::isNormalLoad(CmpOp0.getNode()) &&
- (Value >= 32768 && Value < 65536))
- return true;
+ // Never swap comparisons with zero since there are many ways to optimize
+ // those later.
+ ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+ if (COp1 && COp1->getZExtValue() == 0)
+ return false;
- // Use unsigned comparisons for values that are in the CLGFI range
- // but not in the CGFI range.
- if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+ // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
+ // In that case we generally prefer the memory to be second.
+ if ((isNaturalMemoryOperand(Op0, ICmpType) && Op0.hasOneUse()) &&
+ !(isNaturalMemoryOperand(Op1, ICmpType) && Op1.hasOneUse())) {
+ // The only exceptions are when the second operand is a constant and
+ // we can use things like CHHSI.
+ if (!COp1)
return true;
+ // The unsigned memory-immediate instructions can handle 16-bit
+ // unsigned integers.
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ isUInt<16>(COp1->getZExtValue()))
+ return false;
+ // The signed memory-immediate instructions can handle 16-bit
+ // signed integers.
+ if (ICmpType != SystemZICMP::UnsignedOnly &&
+ isInt<16>(COp1->getSExtValue()))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+// Return true if shift operation N has an in-range constant shift value.
+// Store it in ShiftVal if so.
+static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Shift)
+ return false;
+ uint64_t Amount = Shift->getZExtValue();
+ if (Amount >= N.getValueType().getSizeInBits())
return false;
+
+ ShiftVal = Amount;
+ return true;
+}
+
+// Check whether an AND with Mask is suitable for a TEST UNDER MASK
+// instruction and whether the CC value is descriptive enough to handle
+// a comparison of type Opcode between the AND result and CmpVal.
+// CCMask says which comparison result is being tested and BitSize is
+// the number of bits in the operands. If TEST UNDER MASK can be used,
+// return the corresponding CC mask, otherwise return 0.
+static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
+ uint64_t Mask, uint64_t CmpVal,
+ unsigned ICmpType) {
+ assert(Mask != 0 && "ANDs with zero should have been removed by now");
+
+ // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
+ if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
+ !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
+ return 0;
+
+ // Work out the masks for the lowest and highest bits.
+ unsigned HighShift = 63 - countLeadingZeros(Mask);
+ uint64_t High = uint64_t(1) << HighShift;
+ uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
+
+ // Signed ordered comparisons are effectively unsigned if the sign
+ // bit is dropped.
+ bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
+
+ // Check for equality comparisons with 0, or the equivalent.
+ if (CmpVal == 0) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal <= Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_SOME_1;
+ }
+ if (EffectivelyUnsigned && CmpVal < Low) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_ALL_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_SOME_1;
}
- // Prefer CL for zero-extended loads.
- if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
- ISD::isZEXTLoad(CmpOp1.getNode()))
- return true;
+ // Check for equality comparisons with the mask, or the equivalent.
+ if (CmpVal == Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_ALL_1;
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_SOME_0;
+ }
- // ...and for "in-register" zero extensions.
- if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
- SDValue Mask = CmpOp1.getOperand(1);
- if (Mask.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
- return true;
+ // Check for ordered comparisons with the top bit.
+ if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LE)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GT)
+ return SystemZ::CCMASK_TM_MSB_1;
+ }
+ if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
+ if (CCMask == SystemZ::CCMASK_CMP_LT)
+ return SystemZ::CCMASK_TM_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_GE)
+ return SystemZ::CCMASK_TM_MSB_1;
}
- return false;
+ // If there are just two bits, we can do equality checks for Low and High
+ // as well.
+ if (Mask == Low + High) {
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
+ return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
+ if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1;
+ if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
+ return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
+ }
+
+ // Looks like we've exhausted our options.
+ return 0;
+}
+
+// See whether the comparison (Opcode CmpOp0, CmpOp1, ICmpType) can be
+// implemented as a TEST UNDER MASK instruction when the condition being
+// tested is as described by CCValid and CCMask. Update the arguments
+// with the TM version if so.
+static void adjustForTestUnderMask(SelectionDAG &DAG, unsigned &Opcode,
+ SDValue &CmpOp0, SDValue &CmpOp1,
+ unsigned &CCValid, unsigned &CCMask,
+ unsigned &ICmpType) {
+ // Check that we have a comparison with a constant.
+ ConstantSDNode *ConstCmpOp1 = dyn_cast<ConstantSDNode>(CmpOp1);
+ if (!ConstCmpOp1)
+ return;
+ uint64_t CmpVal = ConstCmpOp1->getZExtValue();
+
+ // Check whether the nonconstant input is an AND with a constant mask.
+ if (CmpOp0.getOpcode() != ISD::AND)
+ return;
+ SDValue AndOp0 = CmpOp0.getOperand(0);
+ SDValue AndOp1 = CmpOp0.getOperand(1);
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(AndOp1.getNode());
+ if (!Mask)
+ return;
+ uint64_t MaskVal = Mask->getZExtValue();
+
+ // Check whether the combination of mask, comparison value and comparison
+ // type are suitable.
+ unsigned BitSize = CmpOp0.getValueType().getSizeInBits();
+ unsigned NewCCMask, ShiftVal;
+ if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SHL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal >> ShiftVal,
+ CmpVal >> ShiftVal,
+ SystemZICMP::Any))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal >> ShiftVal, AndOp0.getValueType());
+ } else if (ICmpType != SystemZICMP::SignedOnly &&
+ AndOp0.getOpcode() == ISD::SRL &&
+ isSimpleShift(AndOp0, ShiftVal) &&
+ (NewCCMask = getTestUnderMaskCond(BitSize, CCMask,
+ MaskVal << ShiftVal,
+ CmpVal << ShiftVal,
+ SystemZICMP::UnsignedOnly))) {
+ AndOp0 = AndOp0.getOperand(0);
+ AndOp1 = DAG.getConstant(MaskVal << ShiftVal, AndOp0.getValueType());
+ } else {
+ NewCCMask = getTestUnderMaskCond(BitSize, CCMask, MaskVal, CmpVal,
+ ICmpType);
+ if (!NewCCMask)
+ return;
+ }
+
+ // Go ahead and make the change.
+ Opcode = SystemZISD::TM;
+ CmpOp0 = AndOp0;
+ CmpOp1 = AndOp1;
+ ICmpType = (bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
+ bool(NewCCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
+ CCValid = SystemZ::CCMASK_TM;
+ CCMask = NewCCMask;
}
// Return a target node that compares CmpOp0 with CmpOp1 and stores a
// 2-bit result in CC. Set CCValid to the CCMASK_* of all possible
// 2-bit results and CCMask to the subset of those results that are
// associated with Cond.
-static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+static SDValue emitCmp(const SystemZTargetMachine &TM, SelectionDAG &DAG,
+ SDLoc DL, SDValue CmpOp0, SDValue CmpOp1,
ISD::CondCode Cond, unsigned &CCValid,
unsigned &CCMask) {
bool IsUnsigned = false;
CCMask = CCMaskForCondCode(Cond);
- if (CmpOp0.getValueType().isFloatingPoint())
+ unsigned Opcode, ICmpType = 0;
+ if (CmpOp0.getValueType().isFloatingPoint()) {
CCValid = SystemZ::CCMASK_FCMP;
- else {
+ Opcode = SystemZISD::FCMP;
+ } else {
IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
CCValid = SystemZ::CCMASK_ICMP;
CCMask &= CCValid;
adjustZeroCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
- if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
- IsUnsigned = true;
+ Opcode = SystemZISD::ICMP;
+ // Choose the type of comparison. Equality and inequality tests can
+ // use either signed or unsigned comparisons. The choice also doesn't
+ // matter if both sign bits are known to be clear. In those cases we
+ // want to give the main isel code the freedom to choose whichever
+ // form fits best.
+ if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+ CCMask == SystemZ::CCMASK_CMP_NE ||
+ (DAG.SignBitIsZero(CmpOp0) && DAG.SignBitIsZero(CmpOp1)))
+ ICmpType = SystemZICMP::Any;
+ else if (IsUnsigned)
+ ICmpType = SystemZICMP::UnsignedOnly;
+ else
+ ICmpType = SystemZICMP::SignedOnly;
}
- SDLoc DL(CmpOp0);
- return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
- DL, MVT::Glue, CmpOp0, CmpOp1);
+ if (shouldSwapCmpOperands(CmpOp0, CmpOp1, ICmpType)) {
+ std::swap(CmpOp0, CmpOp1);
+ CCMask = ((CCMask & SystemZ::CCMASK_CMP_EQ) |
+ (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) |
+ (CCMask & SystemZ::CCMASK_CMP_UO));
+ }
+
+ adjustForTestUnderMask(DAG, Opcode, CmpOp0, CmpOp1, CCValid, CCMask,
+ ICmpType);
+ if (Opcode == SystemZISD::ICMP || Opcode == SystemZISD::TM)
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1,
+ DAG.getConstant(ICmpType, MVT::i32));
+ return DAG.getNode(Opcode, DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Implement a 32-bit *MUL_LOHI operation by extending both operands to
+// 64 bits. Extend is the extension type to use. Store the high part
+// in Hi and the low part in Lo.
+static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL,
+ unsigned Extend, SDValue Op0, SDValue Op1,
+ SDValue &Hi, SDValue &Lo) {
+ Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
+ Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
+ Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
}
// Lower a binary operation that produces two VT results, one in each
@@ -1092,14 +1482,38 @@ static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT,
SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
SDValue(In128, 0), Op1);
bool Is32Bit = is32Bit(VT);
- SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
- SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
- SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg0);
- SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- VT, Result, SubReg1);
- Even = SDValue(Reg0, 0);
- Odd = SDValue(Reg1, 0);
+ Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
+ Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CmpOp0 = Op.getOperand(0);
+ SDValue CmpOp1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc DL(Op);
+
+ unsigned CCValid, CCMask;
+ SDValue Glue = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+
+ IPMConversion Conversion = getIPMConversion(CCValid, CCMask);
+ SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+
+ if (Conversion.XORValue)
+ Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.XORValue, MVT::i32));
+
+ if (Conversion.AddValue)
+ Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.AddValue, MVT::i32));
+
+ // The SHR/AND sequence should get optimized to an RISBG.
+ Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result,
+ DAG.getConstant(Conversion.Bit, MVT::i32));
+ if (Conversion.Bit != 31)
+ Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+ DAG.getConstant(1, MVT::i32));
+ return Result;
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -1111,7 +1525,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned CCValid, CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
Chain, DAG.getConstant(CCValid, MVT::i32),
DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
@@ -1127,7 +1541,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
SDLoc DL(Op);
unsigned CCValid, CCMask;
- SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCValid, CCMask);
+ SDValue Flags = emitCmp(TM, DAG, DL, CmpOp0, CmpOp1, CC, CCValid, CCMask);
SmallVector<SDValue, 5> Ops;
Ops.push_back(TrueOp);
@@ -1151,18 +1565,18 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
- // Make sure that the offset is aligned to a halfword. If it isn't,
- // create an "anchor" at the previous 12-bit boundary.
- // FIXME check whether there is a better way of handling this.
- if (Offset & 1) {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
- Offset & ~uint64_t(0xfff));
- Offset &= 0xfff;
- } else {
- Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+ // Assign anchors at 1<<12 byte boundaries.
+ uint64_t Anchor = Offset & ~uint64_t(0xfff);
+ Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
+ Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+
+ // The offset can be folded into the address if it is aligned to a halfword.
+ Offset -= Anchor;
+ if (Offset != 0 && (Offset & 1) == 0) {
+ SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
+ Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
Offset = 0;
}
- Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
} else {
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -1264,24 +1678,33 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
EVT InVT = In.getValueType();
EVT ResVT = Op.getValueType();
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDValue Shift32 = DAG.getConstant(32, MVT::i64);
if (InVT == MVT::i32 && ResVT == MVT::f32) {
- SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
- SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
- SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
- MVT::f32, Out64, SubReg32);
- return SDValue(Out, 0);
+ SDValue In64;
+ if (Subtarget.hasHighWord()) {
+ SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
+ MVT::i64);
+ In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::i64, SDValue(U64, 0), In);
+ } else {
+ In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+ In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
+ DAG.getConstant(32, MVT::i64));
+ }
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
+ DL, MVT::f32, Out64);
}
if (InVT == MVT::f32 && ResVT == MVT::i32) {
SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
- SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::f64, SDValue(U64, 0), In, SubReg32);
- SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
- SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
- SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
- return Out;
+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
+ MVT::f64, SDValue(U64, 0), In);
+ SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
+ if (Subtarget.hasHighWord())
+ return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
+ MVT::i32, Out64);
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
+ DAG.getConstant(32, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
}
llvm_unreachable("Unexpected bitcast combination");
}
@@ -1364,18 +1787,64 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
- assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+ SDValue Ops[2];
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else {
+ // Do a full 128-bit multiplication based on UMUL_LOHI64:
+ //
+ // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
+ //
+ // but using the fact that the upper halves are either all zeros
+ // or all ones:
+ //
+ // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
+ //
+ // and grouping the right terms together since they are quicker than the
+ // multiplication:
+ //
+ // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
+ SDValue C63 = DAG.getConstant(63, MVT::i64);
+ SDValue LL = Op.getOperand(0);
+ SDValue RL = Op.getOperand(1);
+ SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
+ SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. SMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ LL, RL, Ops[1], Ops[0]);
+ SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
+ SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
+ SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
+ Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
+ }
+ return DAG.getMergeValues(Ops, 2, DL);
+}
- // UMUL_LOHI64 returns the low result in the odd register and the high
- // result in the even register. UMUL_LOHI is defined to return the
- // low half first, so the results are in reverse order.
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
SDValue Ops[2];
- lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
- Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+ if (is32Bit(VT))
+ // Just do a normal 64-bit multiplication and extract the results.
+ // We define this so that it can be used for constant division.
+ lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
+ Op.getOperand(1), Ops[1], Ops[0]);
+ else
+ // UMUL_LOHI64 returns the low result in the odd register and the high
+ // result in the even register. UMUL_LOHI is defined to return the
+ // low half first, so the results are in reverse order.
+ lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+ Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
return DAG.getMergeValues(Ops, 2, DL);
}
@@ -1464,10 +1933,10 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// high 32 bits and just masks out low bits. We can skip it if so.
if (HighOp.getOpcode() == ISD::AND &&
HighOp.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
- uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
- if ((Mask >> 32) == 0xffffffff)
- HighOp = HighOp.getOperand(0);
+ SDValue HighOp0 = HighOp.getOperand(0);
+ uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
+ if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
+ HighOp = HighOp0;
}
// Take advantage of the fact that all GR32 operations only change the
@@ -1476,10 +1945,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// can be folded.
SDLoc DL(Op);
SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
- SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
- SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
- MVT::i64, HighOp, Low32, SubReg32);
- return SDValue(Result, 0);
+ return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
+ MVT::i64, HighOp, Low32);
}
// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
@@ -1618,6 +2085,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SystemZ::R15D, Op.getOperand(1));
}
+SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+ if (!IsData)
+ // Just preserve the chain.
+ return Op.getOperand(0);
+
+ bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
+ MemIntrinsicSDNode *Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDValue Ops[] = {
+ Op.getOperand(0),
+ DAG.getConstant(Code, MVT::i32),
+ Op.getOperand(1)
+ };
+ return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
+ Node->getVTList(), Ops, array_lengthof(Ops),
+ Node->getMemoryVT(), Node->getMemOperand());
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1625,6 +2112,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerBR_CC(Op, DAG);
case ISD::SELECT_CC:
return lowerSELECT_CC(Op, DAG);
+ case ISD::SETCC:
+ return lowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
case ISD::GlobalTLSAddress:
@@ -1643,6 +2132,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerVACOPY(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return lowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SMUL_LOHI:
+ return lowerSMUL_LOHI(Op, DAG);
case ISD::UMUL_LOHI:
return lowerUMUL_LOHI(Op, DAG);
case ISD::SDIVREM:
@@ -1679,6 +2170,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKSAVE(Op, DAG);
case ISD::STACKRESTORE:
return lowerSTACKRESTORE(Op, DAG);
+ case ISD::PREFETCH:
+ return lowerPREFETCH(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -1689,9 +2182,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
OPCODE(RET_FLAG);
OPCODE(CALL);
+ OPCODE(SIBCALL);
OPCODE(PCREL_WRAPPER);
- OPCODE(CMP);
- OPCODE(UCMP);
+ OPCODE(PCREL_OFFSET);
+ OPCODE(ICMP);
+ OPCODE(FCMP);
+ OPCODE(TM);
OPCODE(BR_CCMASK);
OPCODE(SELECT_CCMASK);
OPCODE(ADJDYNALLOC);
@@ -1701,6 +2197,19 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(UDIVREM32);
OPCODE(UDIVREM64);
OPCODE(MVC);
+ OPCODE(MVC_LOOP);
+ OPCODE(NC);
+ OPCODE(NC_LOOP);
+ OPCODE(OC);
+ OPCODE(OC_LOOP);
+ OPCODE(XC);
+ OPCODE(XC_LOOP);
+ OPCODE(CLC);
+ OPCODE(CLC_LOOP);
+ OPCODE(STRCMP);
+ OPCODE(STPCPY);
+ OPCODE(SEARCH_STRING);
+ OPCODE(IPM);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -1713,6 +2222,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
+ OPCODE(PREFETCH);
}
return NULL;
#undef OPCODE
@@ -1742,6 +2252,31 @@ static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
return NewMBB;
}
+// Split MBB before MI and return the new block (the one that contains MI).
+static MachineBasicBlock *splitBlockBefore(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+ NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end());
+ NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+ return NewMBB;
+}
+
+// Force base value Base into a register before MI. Return the register.
+static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
+ const SystemZInstrInfo *TII) {
+ if (Base.isReg())
+ return Base.getReg();
+
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg)
+ .addOperand(Base).addImm(0).addReg(0);
+ return Reg;
+}
+
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
@@ -1756,7 +2291,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1777,7 +2312,7 @@ SystemZTargetLowering::emitSelect(MachineInstr *MI,
// %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
// ...
MBB = JoinMBB;
- BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg)
.addReg(TrueReg).addMBB(StartMBB)
.addReg(FalseReg).addMBB(FalseMBB);
@@ -1824,7 +2359,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
CCMask ^= CCValid;
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *JoinMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1900,7 +2435,7 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
// Insert a basic block for the main loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
@@ -1934,11 +2469,11 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
.addReg(RotatedOldVal).addOperand(Src2);
if (BitSize < 32)
// XILF with the upper BitSize bits set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
else if (BitSize == 32)
// XILF with every bit set.
- BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+ BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
.addReg(Tmp).addImm(~uint32_t(0));
else {
// Use LCGR and add -1 to the result, which is more compact than
@@ -2022,7 +2557,7 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
// Insert 3 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
@@ -2129,7 +2664,7 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Insert 2 basic blocks for the loop.
MachineBasicBlock *StartMBB = MBB;
- MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB);
@@ -2205,8 +2740,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true
// if the high register of the GR128 value must be cleared or false if
-// it's "don't care". SubReg is subreg_odd32 when extending a GR32
-// and subreg_odd when extending a GR64.
+// it's "don't care". SubReg is subreg_l32 when extending a GR32
+// and subreg_l64 when extending a GR64.
MachineBasicBlock *
SystemZTargetLowering::emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -2228,7 +2763,7 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
.addImm(0);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
- .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+ .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
In128 = NewIn128;
}
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
@@ -2239,28 +2774,237 @@ SystemZTargetLowering::emitExt128(MachineInstr *MI,
}
MachineBasicBlock *
-SystemZTargetLowering::emitMVCWrapper(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
+SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineOperand DestBase = MI->getOperand(0);
+ MachineOperand DestBase = earlyUseOperand(MI->getOperand(0));
uint64_t DestDisp = MI->getOperand(1).getImm();
- MachineOperand SrcBase = MI->getOperand(2);
+ MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2));
uint64_t SrcDisp = MI->getOperand(3).getImm();
uint64_t Length = MI->getOperand(4).getImm();
- BuildMI(*MBB, MI, DL, TII->get(SystemZ::MVC))
- .addOperand(DestBase).addImm(DestDisp).addImm(Length)
- .addOperand(SrcBase).addImm(SrcDisp);
+ // When generating more than one CLC, all but the last will need to
+ // branch to the end when a difference is found.
+ MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
+ splitBlockAfter(MI, MBB) : 0);
+
+ // Check for the loop form, in which operand 5 is the trip count.
+ if (MI->getNumExplicitOperands() > 5) {
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+
+ uint64_t StartCountReg = MI->getOperand(5).getReg();
+ uint64_t StartSrcReg = forceReg(MI, SrcBase, TII);
+ uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
+
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ uint64_t ThisSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
+ uint64_t NextSrcReg = MRI.createVirtualRegister(RC);
+ uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
+ RC = &SystemZ::GR64BitRegClass;
+ uint64_t ThisCountReg = MRI.createVirtualRegister(RC);
+ uint64_t NextCountReg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
+ // [ %NextDestReg, NextMBB ]
+ // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
+ // [ %NextSrcReg, NextMBB ]
+ // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
+ // [ %NextCountReg, NextMBB ]
+ // ( PFD 2, 768+DestDisp(%ThisDestReg) )
+ // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
+ // ( JLH EndMBB )
+ //
+ // The prefetch is used only for MVC. The JLH is used only for CLC.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
+ .addReg(StartDestReg).addMBB(StartMBB)
+ .addReg(NextDestReg).addMBB(NextMBB);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
+ .addReg(StartSrcReg).addMBB(StartMBB)
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
+ .addReg(StartCountReg).addMBB(StartMBB)
+ .addReg(NextCountReg).addMBB(NextMBB);
+ if (Opcode == SystemZ::MVC)
+ BuildMI(MBB, DL, TII->get(SystemZ::PFD))
+ .addImm(SystemZ::PFD_WRITE)
+ .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(ThisDestReg).addImm(DestDisp).addImm(256)
+ .addReg(ThisSrcReg).addImm(SrcDisp);
+ if (EndMBB) {
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ }
+
+ // NextMBB:
+ // %NextDestReg = LA 256(%ThisDestReg)
+ // %NextSrcReg = LA 256(%ThisSrcReg)
+ // %NextCountReg = AGHI %ThisCountReg, -1
+ // CGHI %NextCountReg, 0
+ // JLH LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
+ MBB = NextMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
+ .addReg(ThisDestReg).addImm(256).addReg(0);
+ if (!HaveSingleBase)
+ BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
+ .addReg(ThisSrcReg).addImm(256).addReg(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
+ .addReg(ThisCountReg).addImm(-1);
+ BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
+ .addReg(NextCountReg).addImm(0);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ MBB = DoneMBB;
+ }
+ // Handle any remaining bytes with straight-line code.
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
+ // The previous iteration might have created out-of-range displacements.
+ // Apply them using LAY if so.
+ if (!isUInt<12>(DestDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(DestBase).addImm(DestDisp).addReg(0);
+ DestBase = MachineOperand::CreateReg(Reg, false);
+ DestDisp = 0;
+ }
+ if (!isUInt<12>(SrcDisp)) {
+ unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg)
+ .addOperand(SrcBase).addImm(SrcDisp).addReg(0);
+ SrcBase = MachineOperand::CreateReg(Reg, false);
+ SrcDisp = 0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(Opcode))
+ .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
+ .addOperand(SrcBase).addImm(SrcDisp);
+ DestDisp += ThisLength;
+ SrcDisp += ThisLength;
+ Length -= ThisLength;
+ // If there's another CLC to go, branch to the end if a difference
+ // was found.
+ if (EndMBB && Length > 0) {
+ MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
+ .addMBB(EndMBB);
+ MBB->addSuccessor(EndMBB);
+ MBB->addSuccessor(NextMBB);
+ MBB = NextMBB;
+ }
+ }
+ if (EndMBB) {
+ MBB->addSuccessor(EndMBB);
+ MBB = EndMBB;
+ MBB->addLiveIn(SystemZ::CC);
+ }
MI->eraseFromParent();
return MBB;
}
+// Decompose string pseudo-instruction MI into a loop that continually performs
+// Opcode until CC != 3.
+MachineBasicBlock *
+SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ const SystemZInstrInfo *TII = TM.getInstrInfo();
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ uint64_t End1Reg = MI->getOperand(0).getReg();
+ uint64_t Start1Reg = MI->getOperand(1).getReg();
+ uint64_t Start2Reg = MI->getOperand(2).getReg();
+ uint64_t CharReg = MI->getOperand(3).getReg();
+
+ const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
+ uint64_t This1Reg = MRI.createVirtualRegister(RC);
+ uint64_t This2Reg = MRI.createVirtualRegister(RC);
+ uint64_t End2Reg = MRI.createVirtualRegister(RC);
+
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
+
+ // LoopMBB:
+ // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
+ // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
+ // R0L = %CharReg
+ // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
+ // JO LoopMBB
+ // # fall through to DoneMMB
+ //
+ // The load of R0L can be hoisted by post-RA LICM.
+ MBB = LoopMBB;
+
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
+ .addReg(Start1Reg).addMBB(StartMBB)
+ .addReg(End1Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
+ .addReg(Start2Reg).addMBB(StartMBB)
+ .addReg(End2Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
+ BuildMI(MBB, DL, TII->get(Opcode))
+ .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
+ .addReg(This1Reg).addReg(This2Reg);
+ BuildMI(MBB, DL, TII->get(SystemZ::BRC))
+ .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
+ MBB->addSuccessor(LoopMBB);
+ MBB->addSuccessor(DoneMBB);
+
+ DoneMBB->addLiveIn(SystemZ::CC);
+
+ MI->eraseFromParent();
+ return DoneMBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
+ case SystemZ::Select32Mux:
case SystemZ::Select32:
case SystemZ::SelectF32:
case SystemZ::Select64:
@@ -2268,18 +3012,14 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
- case SystemZ::CondStore8_32:
- return emitCondStore(MI, MBB, SystemZ::STC32, 0, false);
- case SystemZ::CondStore8_32Inv:
- return emitCondStore(MI, MBB, SystemZ::STC32, 0, true);
- case SystemZ::CondStore16_32:
- return emitCondStore(MI, MBB, SystemZ::STH32, 0, false);
- case SystemZ::CondStore16_32Inv:
- return emitCondStore(MI, MBB, SystemZ::STH32, 0, true);
- case SystemZ::CondStore32_32:
- return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, false);
- case SystemZ::CondStore32_32Inv:
- return emitCondStore(MI, MBB, SystemZ::ST32, SystemZ::STOC32, true);
+ case SystemZ::CondStore8Mux:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
+ case SystemZ::CondStore8MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
+ case SystemZ::CondStore16Mux:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
+ case SystemZ::CondStore16MuxInv:
+ return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
case SystemZ::CondStore8:
return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
case SystemZ::CondStore8Inv:
@@ -2306,11 +3046,11 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
case SystemZ::AEXT128_64:
- return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, false, SystemZ::subreg_l64);
case SystemZ::ZEXT128_32:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l32);
case SystemZ::ZEXT128_64:
- return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+ return emitExt128(MI, MBB, true, SystemZ::subreg_l64);
case SystemZ::ATOMIC_SWAPW:
return emitAtomicLoadBinary(MI, MBB, 0, 0);
@@ -2346,98 +3086,98 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_LOADW_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
case SystemZ::ATOMIC_LOADW_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
case SystemZ::ATOMIC_LOAD_NR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
- case SystemZ::ATOMIC_LOAD_NILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
- case SystemZ::ATOMIC_LOAD_NILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
- case SystemZ::ATOMIC_LOAD_NILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
- case SystemZ::ATOMIC_LOAD_NGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
case SystemZ::ATOMIC_LOAD_NILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
case SystemZ::ATOMIC_LOAD_NILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
- case SystemZ::ATOMIC_LOAD_NIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
- case SystemZ::ATOMIC_LOAD_NIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
case SystemZ::ATOMIC_LOAD_NILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
- case SystemZ::ATOMIC_LOAD_NIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
+ case SystemZ::ATOMIC_LOAD_NGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+ case SystemZ::ATOMIC_LOAD_NILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
+ case SystemZ::ATOMIC_LOAD_NILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_NILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
+ case SystemZ::ATOMIC_LOAD_NIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
case SystemZ::ATOMIC_LOADW_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
case SystemZ::ATOMIC_LOADW_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
case SystemZ::ATOMIC_LOAD_OR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
- case SystemZ::ATOMIC_LOAD_OILL32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
- case SystemZ::ATOMIC_LOAD_OILH32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
- case SystemZ::ATOMIC_LOAD_OILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
- case SystemZ::ATOMIC_LOAD_OGR:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
case SystemZ::ATOMIC_LOAD_OILL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
case SystemZ::ATOMIC_LOAD_OILH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
- case SystemZ::ATOMIC_LOAD_OIHL:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
- case SystemZ::ATOMIC_LOAD_OIHH:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
case SystemZ::ATOMIC_LOAD_OILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
- case SystemZ::ATOMIC_LOAD_OIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
+ case SystemZ::ATOMIC_LOAD_OGR:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+ case SystemZ::ATOMIC_LOAD_OILL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
+ case SystemZ::ATOMIC_LOAD_OILH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHL64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHH64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
+ case SystemZ::ATOMIC_LOAD_OILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
+ case SystemZ::ATOMIC_LOAD_OIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
case SystemZ::ATOMIC_LOADW_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
case SystemZ::ATOMIC_LOADW_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
case SystemZ::ATOMIC_LOAD_XR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
- case SystemZ::ATOMIC_LOAD_XILF32:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+ case SystemZ::ATOMIC_LOAD_XILF:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
case SystemZ::ATOMIC_LOAD_XGR:
return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
- case SystemZ::ATOMIC_LOAD_XILF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
- case SystemZ::ATOMIC_LOAD_XIHF:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+ case SystemZ::ATOMIC_LOAD_XILF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
+ case SystemZ::ATOMIC_LOAD_XIHF64:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
case SystemZ::ATOMIC_LOADW_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
case SystemZ::ATOMIC_LOADW_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
case SystemZ::ATOMIC_LOAD_NRi:
return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
- case SystemZ::ATOMIC_LOAD_NILL32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILH32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
- case SystemZ::ATOMIC_LOAD_NILF32i:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
- case SystemZ::ATOMIC_LOAD_NGRi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
case SystemZ::ATOMIC_LOAD_NILLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
case SystemZ::ATOMIC_LOAD_NILHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHLi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHHi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
case SystemZ::ATOMIC_LOAD_NILFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
- case SystemZ::ATOMIC_LOAD_NIHFi:
- return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
+ case SystemZ::ATOMIC_LOAD_NGRi:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHL64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHH64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NILF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
+ case SystemZ::ATOMIC_LOAD_NIHF64i:
+ return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
case SystemZ::ATOMIC_LOADW_MIN:
return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
@@ -2481,8 +3221,27 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
- case SystemZ::MVCWrapper:
- return emitMVCWrapper(MI, MBB);
+ case SystemZ::MVCSequence:
+ case SystemZ::MVCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
+ case SystemZ::NCSequence:
+ case SystemZ::NCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::NC);
+ case SystemZ::OCSequence:
+ case SystemZ::OCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::OC);
+ case SystemZ::XCSequence:
+ case SystemZ::XCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::XC);
+ case SystemZ::CLCSequence:
+ case SystemZ::CLCLoop:
+ return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
+ case SystemZ::CLSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::CLST);
+ case SystemZ::MVSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::MVST);
+ case SystemZ::SRSTLoop:
+ return emitStringWrapper(MI, MBB, SystemZ::SRST);
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index c0dbe49..c6dcca6 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -32,17 +32,32 @@ namespace SystemZISD {
// is the target address. The arguments start at operand 2.
// There is an optional glue operand at the end.
CALL,
+ SIBCALL,
// Wraps a TargetGlobalAddress that should be loaded using PC-relative
// accesses (LARL). Operand 0 is the address.
PCREL_WRAPPER,
- // Signed integer and floating-point comparisons. The operands are the
- // two values to compare.
- CMP,
+ // Used in cases where an offset is applied to a TargetGlobalAddress.
+ // Operand 0 is the full TargetGlobalAddress and operand 1 is a
+ // PCREL_WRAPPER for an anchor point. This is used so that we can
+ // cheaply refer to either the full address or the anchor point
+ // as a register base.
+ PCREL_OFFSET,
- // Likewise unsigned integer comparison.
- UCMP,
+ // Integer comparisons. There are three operands: the two values
+ // to compare, and an integer of type SystemZICMP.
+ ICMP,
+
+ // Floating-point comparisons. The two operands are the values to compare.
+ FCMP,
+
+ // Test under mask. The first operand is ANDed with the second operand
+ // and the condition codes are set on the result. The third operand is
+ // a boolean that is true if the condition codes need to distinguish
+ // between CCMASK_TM_MIXED_MSB_0 and CCMASK_TM_MIXED_MSB_1 (which the
+ // register forms do but the memory forms don't).
+ TM,
// Branches if a condition is true. Operand 0 is the chain operand;
// operand 1 is the 4-bit condition-code mask, with bit N in
@@ -73,13 +88,50 @@ namespace SystemZISD {
UDIVREM32,
UDIVREM64,
- // Use MVC to copy bytes from one memory location to another.
- // The first operand is the target address, the second operand is the
- // source address, and the third operand is the constant length.
+ // Use a series of MVCs to copy bytes from one memory location to another.
+ // The operands are:
+ // - the target address
+ // - the source address
+ // - the constant length
+ //
// This isn't a memory opcode because we'd need to attach two
// MachineMemOperands rather than one.
MVC,
+ // Like MVC, but implemented as a loop that handles X*256 bytes
+ // followed by straight-line code to handle the rest (if any).
+ // The value of X is passed as an additional operand.
+ MVC_LOOP,
+
+ // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ NC,
+ NC_LOOP,
+ OC,
+ OC_LOOP,
+ XC,
+ XC_LOOP,
+
+ // Use CLC to compare two blocks of memory, with the same comments
+ // as for MVC and MVC_LOOP.
+ CLC,
+ CLC_LOOP,
+
+ // Use an MVST-based sequence to implement stpcpy().
+ STPCPY,
+
+ // Use a CLST-based sequence to implement strcmp(). The two input operands
+ // are the addresses of the strings to compare.
+ STRCMP,
+
+ // Use an SRST-based sequence to search a block of memory. The first
+ // operand is the end address, the second is the start, and the third
+ // is the character to search for. CC is set to 1 on success and 2
+ // on failure.
+ SEARCH_STRING,
+
+ // Store the CC value in bits 29 and 28 of an integer.
+ IPM,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -111,7 +163,27 @@ namespace SystemZISD {
// operand into the high bits
// Operand 4: the negative of operand 2, for rotating the other way
// Operand 5: the width of the field in bits (8 or 16)
- ATOMIC_CMP_SWAPW
+ ATOMIC_CMP_SWAPW,
+
+ // Prefetch from the second operand using the 4-bit control code in
+ // the first operand. The code is 1 for a load prefetch and 2 for
+ // a store prefetch.
+ PREFETCH
+ };
+
+ // Return true if OPCODE is some kind of PC-relative address.
+ inline bool isPCREL(unsigned Opcode) {
+ return Opcode == PCREL_WRAPPER || Opcode == PCREL_OFFSET;
+ }
+}
+
+namespace SystemZICMP {
+ // Describes whether an integer comparison needs to be signed or unsigned,
+ // or whether either type is OK.
+ enum {
+ Any,
+ UnsignedOnly,
+ SignedOnly
};
}
@@ -126,15 +198,15 @@ public:
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
return MVT::i32;
}
- virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE {
- return MVT::i32;
- }
+ virtual EVT getSetCCResultType(LLVMContext &, EVT) const LLVM_OVERRIDE;
virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const LLVM_OVERRIDE;
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const
LLVM_OVERRIDE;
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const
LLVM_OVERRIDE;
+ virtual bool isTruncateFree(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool isTruncateFree(EVT, EVT) const LLVM_OVERRIDE;
virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
virtual std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const std::string &Constraint,
@@ -154,6 +226,8 @@ public:
MachineBasicBlock *BB) const LLVM_OVERRIDE;
virtual SDValue LowerOperation(SDValue Op,
SelectionDAG &DAG) const LLVM_OVERRIDE;
+ virtual bool allowTruncateForTailCall(Type *, Type *) const LLVM_OVERRIDE;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const LLVM_OVERRIDE;
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -176,6 +250,7 @@ private:
const SystemZTargetMachine &TM;
// Implement LowerOperation for individual opcodes.
+ SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -189,6 +264,7 @@ private:
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
@@ -199,6 +275,7 @@ private:
SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -230,8 +307,12 @@ private:
unsigned BitSize) const;
MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitMVCWrapper(MachineInstr *MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
+ MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index b903b51..6080046 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -27,9 +27,9 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
// Load zero.
let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def LZER : InherentRRE<"lze", 0xB374, FP32, (fpimm0)>;
- def LZDR : InherentRRE<"lzd", 0xB375, FP64, (fpimm0)>;
- def LZXR : InherentRRE<"lzx", 0xB376, FP128, (fpimm0)>;
+ def LZER : InherentRRE<"lzer", 0xB374, FP32, (fpimm0)>;
+ def LZDR : InherentRRE<"lzdr", 0xB375, FP64, (fpimm0)>;
+ def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
}
// Moves between two floating-point registers.
@@ -62,7 +62,7 @@ let isCodeGenOnly = 1 in {
// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP32:$src1, FP128:$src2),
- (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+ (CPSDRsd FP32:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP64 result.
let isCodeGenOnly = 1 in
@@ -71,24 +71,24 @@ def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>;
// The sign of an FP128 is in the high register.
def : Pat<(fcopysign FP64:$src1, FP128:$src2),
- (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+ (CPSDRdd FP64:$src1, (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
// fcopysign with an FP128 result. Use "upper" as the high half and leave
// the low half as-is.
class CopySign128<RegisterOperand cls, dag upper>
: Pat<(fcopysign FP128:$src1, cls:$src2),
- (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+ (INSERT_SUBREG FP128:$src1, upper, subreg_h64)>;
-def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_high),
+def : CopySign128<FP32, (CPSDRds (EXTRACT_SUBREG FP128:$src1, subreg_h64),
FP32:$src2)>;
-def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
+def : CopySign128<FP64, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
FP64:$src2)>;
-def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_high),
- (EXTRACT_SUBREG FP128:$src2, subreg_high))>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src1, subreg_h64),
+ (EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
-defm LoadStoreF32 : MVCLoadStore<load, store, f32, MVCWrapper, 4>;
-defm LoadStoreF64 : MVCLoadStore<load, store, f64, MVCWrapper, 8>;
-defm LoadStoreF128 : MVCLoadStore<load, store, f128, MVCWrapper, 16>;
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
//===----------------------------------------------------------------------===//
// Load instructions
@@ -134,9 +134,9 @@ def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
def : Pat<(f32 (fround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fround FP128:$src)),
- (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+ (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
// Extend register floating-point values to wider representations.
def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64, FP32>;
@@ -212,21 +212,56 @@ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
// Round to an integer, with the second operand (modifier M3) specifying
-// the rounding mode.
-//
-// These forms always check for inexact conditions. z196 added versions
-// that allow this to suppressed (as for fnearbyint), but we don't yet
-// support -march=z196.
+// the rounding mode. These forms always check for inexact conditions.
def FIEBR : UnaryRRF<"fieb", 0xB357, FP32, FP32>;
def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64, FP64>;
def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
+// Extended forms of the previous three instructions. M4 can be set to 4
+// to suppress detection of inexact conditions.
+def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32, FP32>,
+ Requires<[FeatureFPExtension]>;
+def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64, FP64>,
+ Requires<[FeatureFPExtension]>;
+def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+
// frint rounds according to the current mode (modifier 0) and detects
// inexact conditions.
def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+let Predicates = [FeatureFPExtension] in {
+ // fnearbyint is like frint but does not detect inexact conditions.
+ def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+
+ // floor is no longer allowed to raise an inexact condition,
+ // so restrict it to the cases where the condition can be suppressed.
+ // Mode 7 is round towards -inf.
+ def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+
+ // Same idea for ceil, where mode 6 is round towards +inf.
+ def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+
+ // Same idea for trunc, where mode 5 is round towards zero.
+ def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+
+ // Same idea for round, where mode 1 is round towards nearest with
+ // ties away from zero.
+ def : Pat<(frnd FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(frnd FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+}
+
//===----------------------------------------------------------------------===//
// Binary arithmetic
//===----------------------------------------------------------------------===//
@@ -265,26 +300,26 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- FP32:$src1, subreg_32bit), FP32:$src2)>;
+ FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
def : Pat<(fmul (f64 (fextend FP32:$src1)),
(f64 (extloadf32 bdxaddr12only:$addr))),
- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>;
def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
- FP64:$src1, subreg_high), FP64:$src2)>;
+ FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
def : Pat<(fmul (f128 (fextend FP64:$src1)),
(f128 (extloadf64 bdxaddr12only:$addr))),
- (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+ (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
// Fused multiply-add.
@@ -314,12 +349,12 @@ def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
//===----------------------------------------------------------------------===//
let Defs = [CC], CCValues = 0xF in {
- def CEBR : CompareRRE<"ceb", 0xB309, z_cmp, FP32, FP32>;
- def CDBR : CompareRRE<"cdb", 0xB319, z_cmp, FP64, FP64>;
- def CXBR : CompareRRE<"cxb", 0xB349, z_cmp, FP128, FP128>;
+ def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32, FP32>;
+ def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64, FP64>;
+ def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>;
- def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load, 4>;
- def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load, 8>;
+ def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
+ def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 954df11..a8efe16 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -308,10 +308,11 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R2;
bits<4> R3;
+ bits<4> R4;
let Inst{31-16} = op;
let Inst{15-12} = R3;
- let Inst{11-8} = 0;
+ let Inst{11-8} = R4;
let Inst{7-4} = R1;
let Inst{3-0} = R2;
}
@@ -539,6 +540,10 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// One output operand and five input operands. The first two operands
// are registers and the other three are immediates.
//
+// Prefetch:
+// One 4-bit immediate operand and one address operand. The immediate
+// operand is 1 for a load prefetch and 2 for a store prefetch.
+//
// The format determines which input operands are tied to output operands,
// and also determines the shape of any address operand.
//
@@ -552,7 +557,7 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
dag src>
: InstRRE<opcode, (outs cls:$R1), (ins),
- mnemonic#"r\t$R1",
+ mnemonic#"\t$R1",
[(set cls:$R1, src)]> {
let R2 = 0;
}
@@ -626,27 +631,33 @@ class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
let mayStore = 1;
}
+// StoreSI* instructions are used to store an integer to memory, but the
+// addresses are more restricted than for normal stores. If we are in the
+// situation of having to force either the address into a register or the
+// constant into a register, it's usually better to do the latter.
+// We therefore match the address in the same way as a normal store and
+// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ Immediate imm>
+ : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, mode:$BD1)]> {
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- Immediate imm, AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ Immediate imm>
+ : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, mode:$BD1)]> {
+ [(operator imm:$I2, mviaddr20pair:$BD1)]> {
let mayStore = 1;
}
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Immediate imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
- [(operator imm:$I2, bdaddr12only:$BD1)]> {
+ [(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
}
@@ -654,9 +665,9 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, Immediate imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
- def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+ def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
let DispSize = "20" in
- def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+ def Y : StoreSIY<mnemonic#"y", siyOpcode, operator, imm>;
}
}
@@ -719,8 +730,14 @@ class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
mnemonic#"r\t$R1, $R3, $R2", []> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
+ let R4 = 0;
}
+class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2>
+ : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4),
+ mnemonic#"\t$R1, $R3, $R2, $R4", []>;
+
// These instructions are generated by if conversion. The old value of R1
// is added as an implicit use.
class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
@@ -729,6 +746,7 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
mnemonic#"r$R3\t$R1, $R2", []>,
Requires<[FeatureLoadStoreOnCond]> {
let CCMaskLast = 1;
+ let R4 = 0;
}
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
@@ -740,6 +758,7 @@ class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
Requires<[FeatureLoadStoreOnCond]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
+ let R4 = 0;
}
// Like CondUnaryRRF, but with a fixed CC mask.
@@ -751,6 +770,7 @@ class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let R3 = ccmask;
+ let R4 = 0;
}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
@@ -898,13 +918,16 @@ class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
[(set cls1:$R1, (operator cls1:$R3, cls2:$R2))]> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
+ let R4 = 0;
}
class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
mnemonic#"rk\t$R1, $R2, $R3",
- [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]>;
+ [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
+ let R4 = 0;
+}
multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls1,
@@ -1285,6 +1308,22 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let DisableEncoding = "$R1src";
}
+class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
+ : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ mnemonic##"\t$R1, $XBD2",
+ [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+
+class PrefetchRILPC<string mnemonic, bits<12> opcode,
+ SDPatternOperator operator>
+ : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ mnemonic##"\t$R1, $I2",
+ [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
// A floating-point load-and test operation. Create both a normal unary
// operation and one that acts as a comparison against zero.
multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
@@ -1310,6 +1349,100 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
let isCodeGenOnly = 1;
}
+// Like UnaryRI, but expanded after RA depending on the choice of register.
+class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins imm:$I2),
+ [(set cls:$R1, (operator imm:$I2))]>;
+
+// Like UnaryRXY, but expanded after RA depending on the choice of register.
+class UnaryRXYPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
+ [(set cls:$R1, (operator mode:$XBD2))]> {
+ let OpKey = key ## cls;
+ let OpType = "mem";
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like UnaryRR, but expanded after RA depending on the choice of registers.
+class UnaryRRPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1), (ins cls2:$R2),
+ [(set cls1:$R1, (operator cls2:$R2))]> {
+ let OpKey = key ## cls1;
+ let OpType = "reg";
+}
+
+// Like BinaryRI, but expanded after RA depending on the choice of register.
+class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// Like BinaryRIE, but expanded after RA depending on the choice of register.
+class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
+ [(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
+
+// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
+multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
+ RegisterOperand cls, Immediate imm> {
+ let NumOpsKey = key in {
+ let NumOpsValue = "3" in
+ def K : BinaryRIEPseudo<null_frag, cls, imm>,
+ Requires<[FeatureHighWord, FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRIPseudo<operator, cls, imm>,
+ Requires<[FeatureHighWord]>;
+ }
+}
+
+// Like CompareRI, but expanded after RA depending on the choice of register.
+class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>;
+
+// Like CompareRXY, but expanded after RA depending on the choice of register.
+class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ SDPatternOperator load, bits<5> bytes,
+ AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, (load mode:$XBD2))]> {
+ let mayLoad = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like StoreRXY, but expanded after RA depending on the choice of register.
+class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
+ bits<5> bytes, AddressingMode mode = bdxaddr20only>
+ : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ [(operator cls:$R1, mode:$XBD2)]> {
+ let mayStore = 1;
+ let Has20BitOffset = 1;
+ let HasIndex = 1;
+ let AccessBytes = bytes;
+}
+
+// Like RotateSelectRIEf, but expanded after RA depending on the choice
+// of registers.
+class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ []> {
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
@@ -1386,3 +1519,85 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
+
+// Define an instruction that operates on two fixed-length blocks of memory,
+// and associated pseudo instructions for operating on blocks of any size.
+// The Sequence form uses a straight-line sequence of instructions and
+// the Loop form uses a loop of length-256 instructions followed by
+// another instruction to handle the excess.
+multiclass MemorySS<string mnemonic, bits<8> opcode,
+ SDPatternOperator sequence, SDPatternOperator loop> {
+ def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1,
+ bdaddr12only:$BD2),
+ mnemonic##"\t$BDL1, $BD2", []>;
+ let usesCustomInserter = 1 in {
+ def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(sequence bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256),
+ [(loop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length, GR64:$count256)]>;
+ }
+}
+
+// Define an instruction that operates on two strings, both terminated
+// by the character in R0. The instruction processes a CPU-determinated
+// number of bytes at a time and sets CC to 3 if the instruction needs
+// to be repeated. Also define a pseudo instruction that represents
+// the full loop (the main instruction plus the branch on CC==3).
+multiclass StringRRE<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator> {
+ def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
+ (ins GR64:$R1src, GR64:$R2src),
+ mnemonic#"\t$R1, $R2", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ }
+ let usesCustomInserter = 1 in
+ def Loop : Pseudo<(outs GR64:$end),
+ (ins GR64:$start1, GR64:$start2, GR32:$char),
+ [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
+ GR32:$char))]>;
+}
+
+// A pseudo instruction that is a direct alias of a real instruction.
+// These aliases are used in cases where a particular register operand is
+// fixed or where the same instruction is used with different register sizes.
+// The size parameter is the size in bytes of the associated real instruction.
+class Alias<int size, dag outs, dag ins, list<dag> pattern>
+ : InstSystemZ<size, outs, ins, "", pattern> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
+
+// An alias of a BinaryRI, but with different register sizes.
+class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a BinaryRIL, but with different register sizes.
+class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+ [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+ let Constraints = "$R1 = $R1src";
+}
+
+// An alias of a CompareRI, but with different register sizes.
+class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
+ Immediate imm>
+ : Alias<4, (outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+ let isCompare = 1;
+}
+
+// An alias of a RotateSelectRIEf, but with different register sizes.
+class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
+ : Alias<6, (outs cls1:$R1),
+ (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
+ let Constraints = "$R1 = $R1src";
+}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 9ee60aa..acfeed8 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
@@ -28,6 +28,18 @@ static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
}
+// Reg should be a 32-bit GPR. Return true if it is a high register rather
+// than a low register.
+static bool isHighReg(unsigned int Reg) {
+ if (SystemZ::GRH32BitRegClass.contains(Reg))
+ return true;
+ assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
+ return false;
+}
+
+// Pin the vtable to this file.
+void SystemZInstrInfo::anchor() {}
+
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
RI(tm), TM(tm) {
@@ -48,8 +60,8 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
// Set up the two 64-bit registers.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
- HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
- LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+ HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
+ LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
@@ -82,6 +94,97 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
OffsetMO.setImm(Offset);
}
+// MI is an RI-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32. ConvertHigh is true if LowOpcode takes a signed operand
+// and HighOpcode takes an unsigned 32-bit operand. In those cases,
+// MI has the same kind of operand as LowOpcode, so needs to be converted
+// if HighOpcode is used.
+void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ bool ConvertHigh) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ bool IsHigh = isHighReg(Reg);
+ MI->setDesc(get(IsHigh ? HighOpcode : LowOpcode));
+ if (IsHigh && ConvertHigh)
+ MI->getOperand(1).setImm(uint32_t(MI->getOperand(1).getImm()));
+}
+
+// MI is a three-operand RIE-style pseudo instruction. Replace it with
+// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// followed by HighOpcode or LowOpcode, depending on whether the target
+// is a high or low GR32.
+void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK,
+ unsigned HighOpcode) const {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (!DestIsHigh && !SrcIsHigh)
+ MI->setDesc(get(LowOpcodeK));
+ else {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ DestReg, SrcReg, SystemZ::LR, 32,
+ MI->getOperand(1).isKill());
+ MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
+ MI->getOperand(1).setReg(DestReg);
+ }
+}
+
+// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
+// if the first operand is a low GR32 and HighOpcode if the first operand
+// is a high GR32.
+void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const {
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
+ MI->getOperand(2).getImm());
+ MI->setDesc(get(Opcode));
+}
+
+// MI is an RR-style pseudo instruction that zero-extends the low Size bits
+// of one GRX32 into another. Replace it with LowOpcode if both operands
+// are low registers, otherwise use RISB[LH]G.
+void SystemZInstrInfo::expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const {
+ emitGRX32Move(*MI->getParent(), MI, MI->getDebugLoc(),
+ MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
+ LowOpcode, Size, MI->getOperand(1).isKill());
+ MI->eraseFromParent();
+}
+
+// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
+// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg
+// are low registers, otherwise use RISB[LH]G. Size is the number of bits
+// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
+// KillSrc is true if this move is the last use of SrcReg.
+void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc) const {
+ unsigned Opcode;
+ bool DestIsHigh = isHighReg(DestReg);
+ bool SrcIsHigh = isHighReg(SrcReg);
+ if (DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBHH;
+ else if (DestIsHigh && !SrcIsHigh)
+ Opcode = SystemZ::RISBHL;
+ else if (!DestIsHigh && SrcIsHigh)
+ Opcode = SystemZ::RISBLH;
+ else {
+ BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
+ BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ .addReg(DestReg, RegState::Undef)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
+}
+
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@@ -293,6 +396,103 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
return Count;
}
+bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ assert(MI->isCompare() && "Caller should have checked for a comparison");
+
+ if (MI->getNumExplicitOperands() == 2 &&
+ MI->getOperand(0).isReg() &&
+ MI->getOperand(1).isImm()) {
+ SrcReg = MI->getOperand(0).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(1).getImm();
+ Mask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+// If Reg is a virtual register, return its definition, otherwise return null.
+static MachineInstr *getDef(unsigned Reg,
+ const MachineRegisterInfo *MRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+ return MRI->getUniqueVRegDef(Reg);
+}
+
+// Return true if MI is a shift of type Opcode by Imm bits.
+static bool isShift(MachineInstr *MI, int Opcode, int64_t Imm) {
+ return (MI->getOpcode() == Opcode &&
+ !MI->getOperand(2).getReg() &&
+ MI->getOperand(3).getImm() == Imm);
+}
+
+// If the destination of MI has no uses, delete it as dead.
+static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
+ if (MRI->use_nodbg_empty(MI->getOperand(0).getReg()))
+ MI->eraseFromParent();
+}
+
+// Compare compares SrcReg against zero. Check whether SrcReg contains
+// the result of an IPM sequence whose input CC survives until Compare,
+// and whether Compare is therefore redundant. Delete it and return
+// true if so.
+static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
+ const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ MachineInstr *LGFR = 0;
+ MachineInstr *RLL = getDef(SrcReg, MRI);
+ if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
+ LGFR = RLL;
+ RLL = getDef(LGFR->getOperand(1).getReg(), MRI);
+ }
+ if (!RLL || !isShift(RLL, SystemZ::RLL, 31))
+ return false;
+
+ MachineInstr *SRL = getDef(RLL->getOperand(1).getReg(), MRI);
+ if (!SRL || !isShift(SRL, SystemZ::SRL, SystemZ::IPM_CC))
+ return false;
+
+ MachineInstr *IPM = getDef(SRL->getOperand(1).getReg(), MRI);
+ if (!IPM || IPM->getOpcode() != SystemZ::IPM)
+ return false;
+
+ // Check that there are no assignments to CC between the IPM and Compare,
+ if (IPM->getParent() != Compare->getParent())
+ return false;
+ MachineBasicBlock::iterator MBBI = IPM, MBBE = Compare;
+ for (++MBBI; MBBI != MBBE; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ if (MI->modifiesRegister(SystemZ::CC, TRI))
+ return false;
+ }
+
+ Compare->eraseFromParent();
+ if (LGFR)
+ eraseIfDead(LGFR, MRI);
+ eraseIfDead(RLL, MRI);
+ eraseIfDead(SRL, MRI);
+ eraseIfDead(IPM, MRI);
+
+ return true;
+}
+
+bool
+SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ assert(!SrcReg2 && "Only optimizing constant comparisons so far");
+ bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
+ if (Value == 0 &&
+ !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+ return true;
+ return false;
+}
+
// If Opcode is a move that has a conditional variant, return that variant,
// otherwise return 0.
static unsigned getConditionalMove(unsigned Opcode) {
@@ -356,18 +556,21 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
- RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
- copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
- RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_h64), KillSrc);
+ copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_l64),
+ RI.getSubReg(SrcReg, SystemZ::subreg_l64), KillSrc);
+ return;
+ }
+
+ if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
return;
}
// Everything else needs only one instruction.
unsigned Opcode;
- if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
- Opcode = SystemZ::LR;
- else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+ if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LER;
@@ -438,15 +641,15 @@ namespace {
static LogicOp interpretAndImmediate(unsigned Opcode) {
switch (Opcode) {
- case SystemZ::NILL32: return LogicOp(32, 0, 16);
- case SystemZ::NILH32: return LogicOp(32, 16, 16);
- case SystemZ::NILL: return LogicOp(64, 0, 16);
- case SystemZ::NILH: return LogicOp(64, 16, 16);
- case SystemZ::NIHL: return LogicOp(64, 32, 16);
- case SystemZ::NIHH: return LogicOp(64, 48, 16);
- case SystemZ::NILF32: return LogicOp(32, 0, 32);
- case SystemZ::NILF: return LogicOp(64, 0, 32);
- case SystemZ::NIHF: return LogicOp(64, 32, 32);
+ case SystemZ::NILMux: return LogicOp(32, 0, 16);
+ case SystemZ::NIHMux: return LogicOp(32, 16, 16);
+ case SystemZ::NILL64: return LogicOp(64, 0, 16);
+ case SystemZ::NILH64: return LogicOp(64, 16, 16);
+ case SystemZ::NIHL64: return LogicOp(64, 32, 16);
+ case SystemZ::NIHH64: return LogicOp(64, 48, 16);
+ case SystemZ::NIFMux: return LogicOp(32, 0, 32);
+ case SystemZ::NILF64: return LogicOp(64, 0, 32);
+ case SystemZ::NIHF64: return LogicOp(64, 32, 32);
default: return LogicOp();
}
}
@@ -473,6 +676,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LiveVariables *LV) const {
MachineInstr *MI = MBBI;
MachineBasicBlock *MBB = MI->getParent();
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
unsigned Opcode = MI->getOpcode();
unsigned NumOps = MI->getNumOperands();
@@ -482,10 +686,23 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
if (TM.getSubtargetImpl()->hasDistinctOps()) {
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ unsigned DestReg = Dest.getReg();
+ unsigned SrcReg = Src.getReg();
+ // AHIMux is only really a three-operand instruction when both operands
+ // are low registers. Try to constrain both operands to be low if
+ // possible.
+ if (Opcode == SystemZ::AHIMux &&
+ TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
+ MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
+ MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
+ MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
+ }
int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
if (ThreeOperandOpcode >= 0) {
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
MachineInstrBuilder MIB =
BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
.addOperand(Dest);
@@ -500,34 +717,27 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// Try to convert an AND into an RISBG-type instruction.
if (LogicOp And = interpretAndImmediate(Opcode)) {
- unsigned NewOpcode;
- if (And.RegSize == 64)
- NewOpcode = SystemZ::RISBG;
- else if (TM.getSubtargetImpl()->hasHighWord())
- NewOpcode = SystemZ::RISBLG32;
- else
- // We can't use RISBG for 32-bit operations because it clobbers the
- // high word of the destination too.
- NewOpcode = 0;
- if (NewOpcode) {
- uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
- // AND IMMEDIATE leaves the other bits of the register unchanged.
- Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
- unsigned Start, End;
- if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
- if (NewOpcode == SystemZ::RISBLG32) {
- Start &= 31;
- End &= 31;
- }
- MachineOperand &Dest = MI->getOperand(0);
- MachineOperand &Src = MI->getOperand(1);
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
- .addOperand(Dest).addReg(0)
- .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
- .addImm(Start).addImm(End + 128).addImm(0);
- return finishConvertToThreeAddress(MI, MIB, LV);
+ uint64_t Imm = MI->getOperand(2).getImm() << And.ImmLSB;
+ // AND IMMEDIATE leaves the other bits of the register unchanged.
+ Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
+ unsigned Start, End;
+ if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
+ unsigned NewOpcode;
+ if (And.RegSize == 64)
+ NewOpcode = SystemZ::RISBG;
+ else {
+ NewOpcode = SystemZ::RISBMux;
+ Start &= 31;
+ End &= 31;
}
+ MachineOperand &Dest = MI->getOperand(0);
+ MachineOperand &Src = MI->getOperand(1);
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(NewOpcode))
+ .addOperand(Dest).addReg(0)
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg())
+ .addImm(Start).addImm(End + 128).addImm(0);
+ return finishConvertToThreeAddress(MI, MIB, LV);
}
}
return 0;
@@ -540,8 +750,21 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
+ unsigned Opcode = MI->getOpcode();
+
+ if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+ if ((Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
+ isInt<8>(MI->getOperand(2).getImm()) &&
+ !MI->getOperand(3).getReg()) {
+ // LA(Y) %reg, CONST(%reg) -> AGSI %mem, CONST
+ return BuildMI(MF, MI->getDebugLoc(), get(SystemZ::AGSI))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+ return 0;
+ }
- // Eary exit for cases we don't care about
+ // All other cases require a single operand.
if (Ops.size() != 1)
return 0;
@@ -550,7 +773,16 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
.getRegClass(MI->getOperand(OpNum).getReg())->getSize() &&
"Invalid size combination");
- unsigned Opcode = MI->getOpcode();
+ if ((Opcode == SystemZ::AHI || Opcode == SystemZ::AGHI) &&
+ OpNum == 0 &&
+ isInt<8>(MI->getOperand(2).getImm())) {
+ // A(G)HI %reg, CONST -> A(G)SI %mem, CONST
+ Opcode = (Opcode == SystemZ::AHI ? SystemZ::ASI : SystemZ::AGSI);
+ return BuildMI(MF, MI->getDebugLoc(), get(Opcode))
+ .addFrameIndex(FrameIndex).addImm(0)
+ .addImm(MI->getOperand(2).getImm());
+ }
+
if (Opcode == SystemZ::LGDR || Opcode == SystemZ::LDGR) {
bool Op0IsGPR = (Opcode == SystemZ::LGDR);
bool Op1IsGPR = (Opcode == SystemZ::LDGR);
@@ -577,10 +809,14 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
//
// Although MVC is in practice a fast choice in these cases, it is still
// logically a bytewise copy. This means that we cannot use it if the
- // load or store is volatile. It also means that the transformation is
- // not valid in cases where the two memories partially overlap; however,
- // that is not a problem here, because we know that one of the memories
- // is a full frame index.
+ // load or store is volatile. We also wouldn't be able to use MVC if
+ // the two memories partially overlap, but that case cannot occur here,
+ // because we know that one of the memories is a full frame index.
+ //
+ // For performance reasons, we also want to avoid using MVC if the addresses
+ // might be equal. We don't worry about that case here, because spill slot
+ // coloring happens later, and because we have special code to remove
+ // MVCs that turn out to be redundant.
if (OpNum == 0 && MI->hasOneMemOperand()) {
MachineMemOperand *MMO = *MI->memoperands_begin();
if (MMO->getSize() == Size && !MMO->isVolatile()) {
@@ -651,6 +887,138 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
splitMove(MI, SystemZ::STD);
return true;
+ case SystemZ::LBMux:
+ expandRXYPseudo(MI, SystemZ::LB, SystemZ::LBH);
+ return true;
+
+ case SystemZ::LHMux:
+ expandRXYPseudo(MI, SystemZ::LH, SystemZ::LHH);
+ return true;
+
+ case SystemZ::LLCRMux:
+ expandZExtPseudo(MI, SystemZ::LLCR, 8);
+ return true;
+
+ case SystemZ::LLHRMux:
+ expandZExtPseudo(MI, SystemZ::LLHR, 16);
+ return true;
+
+ case SystemZ::LLCMux:
+ expandRXYPseudo(MI, SystemZ::LLC, SystemZ::LLCH);
+ return true;
+
+ case SystemZ::LLHMux:
+ expandRXYPseudo(MI, SystemZ::LLH, SystemZ::LLHH);
+ return true;
+
+ case SystemZ::LMux:
+ expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
+ return true;
+
+ case SystemZ::STCMux:
+ expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
+ return true;
+
+ case SystemZ::STHMux:
+ expandRXYPseudo(MI, SystemZ::STH, SystemZ::STHH);
+ return true;
+
+ case SystemZ::STMux:
+ expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
+ return true;
+
+ case SystemZ::LHIMux:
+ expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
+ return true;
+
+ case SystemZ::IIFMux:
+ expandRIPseudo(MI, SystemZ::IILF, SystemZ::IIHF, false);
+ return true;
+
+ case SystemZ::IILMux:
+ expandRIPseudo(MI, SystemZ::IILL, SystemZ::IIHL, false);
+ return true;
+
+ case SystemZ::IIHMux:
+ expandRIPseudo(MI, SystemZ::IILH, SystemZ::IIHH, false);
+ return true;
+
+ case SystemZ::NIFMux:
+ expandRIPseudo(MI, SystemZ::NILF, SystemZ::NIHF, false);
+ return true;
+
+ case SystemZ::NILMux:
+ expandRIPseudo(MI, SystemZ::NILL, SystemZ::NIHL, false);
+ return true;
+
+ case SystemZ::NIHMux:
+ expandRIPseudo(MI, SystemZ::NILH, SystemZ::NIHH, false);
+ return true;
+
+ case SystemZ::OIFMux:
+ expandRIPseudo(MI, SystemZ::OILF, SystemZ::OIHF, false);
+ return true;
+
+ case SystemZ::OILMux:
+ expandRIPseudo(MI, SystemZ::OILL, SystemZ::OIHL, false);
+ return true;
+
+ case SystemZ::OIHMux:
+ expandRIPseudo(MI, SystemZ::OILH, SystemZ::OIHH, false);
+ return true;
+
+ case SystemZ::XIFMux:
+ expandRIPseudo(MI, SystemZ::XILF, SystemZ::XIHF, false);
+ return true;
+
+ case SystemZ::TMLMux:
+ expandRIPseudo(MI, SystemZ::TMLL, SystemZ::TMHL, false);
+ return true;
+
+ case SystemZ::TMHMux:
+ expandRIPseudo(MI, SystemZ::TMLH, SystemZ::TMHH, false);
+ return true;
+
+ case SystemZ::AHIMux:
+ expandRIPseudo(MI, SystemZ::AHI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::AHIMuxK:
+ expandRIEPseudo(MI, SystemZ::AHI, SystemZ::AHIK, SystemZ::AIH);
+ return true;
+
+ case SystemZ::AFIMux:
+ expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false);
+ return true;
+
+ case SystemZ::CFIMux:
+ expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false);
+ return true;
+
+ case SystemZ::CLFIMux:
+ expandRIPseudo(MI, SystemZ::CLFI, SystemZ::CLIH, false);
+ return true;
+
+ case SystemZ::CMux:
+ expandRXYPseudo(MI, SystemZ::C, SystemZ::CHF);
+ return true;
+
+ case SystemZ::CLMux:
+ expandRXYPseudo(MI, SystemZ::CL, SystemZ::CLHF);
+ return true;
+
+ case SystemZ::RISBMux: {
+ bool DestIsHigh = isHighReg(MI->getOperand(0).getReg());
+ bool SrcIsHigh = isHighReg(MI->getOperand(2).getReg());
+ if (SrcIsHigh == DestIsHigh)
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHH : SystemZ::RISBLL));
+ else {
+ MI->setDesc(get(DestIsHigh ? SystemZ::RISBHL : SystemZ::RISBLH));
+ MI->getOperand(5).setImm(MI->getOperand(5).getImm() ^ 32);
+ }
+ return true;
+ }
+
case SystemZ::ADJDYNALLOC:
splitAdjDynAlloc(MI);
return true;
@@ -697,11 +1065,21 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr *MI) const {
return SystemZII::Branch(SystemZII::BranchC, SystemZ::CCMASK_ICMP,
MI->getOperand(2).getImm(), &MI->getOperand(3));
+ case SystemZ::CLIJ:
+ case SystemZ::CLRJ:
+ return SystemZII::Branch(SystemZII::BranchCL, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
case SystemZ::CGIJ:
case SystemZ::CGRJ:
return SystemZII::Branch(SystemZII::BranchCG, SystemZ::CCMASK_ICMP,
MI->getOperand(2).getImm(), &MI->getOperand(3));
+ case SystemZ::CLGIJ:
+ case SystemZ::CLGRJ:
+ return SystemZII::Branch(SystemZII::BranchCLG, SystemZ::CCMASK_ICMP,
+ MI->getOperand(2).getImm(), &MI->getOperand(3));
+
default:
llvm_unreachable("Unrecognized branch opcode");
}
@@ -712,7 +1090,13 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
unsigned &StoreOpcode) const {
if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
LoadOpcode = SystemZ::L;
- StoreOpcode = SystemZ::ST32;
+ StoreOpcode = SystemZ::ST;
+ } else if (RC == &SystemZ::GRH32BitRegClass) {
+ LoadOpcode = SystemZ::LFH;
+ StoreOpcode = SystemZ::STFH;
+ } else if (RC == &SystemZ::GRX32BitRegClass) {
+ LoadOpcode = SystemZ::LMux;
+ StoreOpcode = SystemZ::STMux;
} else if (RC == &SystemZ::GR64BitRegClass ||
RC == &SystemZ::ADDR64BitRegClass) {
LoadOpcode = SystemZ::LG;
@@ -830,6 +1214,14 @@ unsigned SystemZInstrInfo::getCompareAndBranch(unsigned Opcode,
return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CIJ : 0;
case SystemZ::CGHI:
return MI && isInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CGIJ : 0;
+ case SystemZ::CLR:
+ return SystemZ::CLRJ;
+ case SystemZ::CLGR:
+ return SystemZ::CLGRJ;
+ case SystemZ::CLFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLIJ : 0;
+ case SystemZ::CLGFI:
+ return MI && isUInt<8>(MI->getOperand(1).getImm()) ? SystemZ::CLGIJ : 0;
default:
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 276fd3b..be4c8fe 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -70,10 +70,18 @@ namespace SystemZII {
// on the result.
BranchC,
+ // An instruction that peforms a 32-bit unsigned comparison and branches
+ // on the result.
+ BranchCL,
+
// An instruction that peforms a 64-bit signed comparison and branches
// on the result.
BranchCG,
+ // An instruction that peforms a 64-bit unsigned comparison and branches
+ // on the result.
+ BranchCLG,
+
// An instruction that decrements a 32-bit register and branches if
// the result is nonzero.
BranchCT,
@@ -108,7 +116,19 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
-
+ void expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode, bool ConvertHigh) const;
+ void expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned LowOpcodeK, unsigned HighOpcode) const;
+ void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned HighOpcode) const;
+ void expandZExtPseudo(MachineInstr *MI, unsigned LowOpcode,
+ unsigned Size) const;
+ void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+ virtual void anchor();
+
public:
explicit SystemZInstrInfo(SystemZTargetMachine &TM);
@@ -129,6 +149,12 @@ public:
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const LLVM_OVERRIDE;
+ bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &Mask, int &Value) const
+ LLVM_OVERRIDE;
+ bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int Mask, int Value,
+ const MachineRegisterInfo *MRI) const LLVM_OVERRIDE;
virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE;
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index b318d67..6524e44 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -32,12 +32,9 @@ let neverHasSideEffects = 1 in {
// Control flow instructions
//===----------------------------------------------------------------------===//
-// A return instruction. R1 is the condition-code mask (all 1s)
-// and R2 is the target address, which is always stored in %r14.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
- R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
- def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
-}
+// A return instruction (br %r14).
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+ def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
// Unconditional branches. R1 is the condition-code mask (all 1s).
let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
@@ -70,6 +67,8 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
"brc\t$R1, $I2", []>;
def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2),
"brcl\t$R1, $I2", []>;
+ def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2),
+ "bcr\t$R1, $R2", []>;
}
// Fused compare-and-branch instructions. As for normal branches,
@@ -94,6 +93,18 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
brtarget16:$RI4),
"cgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LRJ : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgrj"##pos1##"\t$R1, $R2, "##pos2##"$RI4", []>;
+ def LIJ : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
+ def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
+ brtarget16:$RI4),
+ "clgij"##pos1##"\t$R1, $I2, "##pos2##"$RI4", []>;
}
}
let isCodeGenOnly = 1 in
@@ -108,6 +119,7 @@ multiclass CondExtendedMnemonic<bits<4> ccmask, string name> {
"j"##name##"\t$I2", []>;
def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
"jg"##name##"\t$I2", []>;
+ def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>;
}
def LOCR : FixedCondUnaryRRF<"locr"##name, 0xB9F2, GR32, GR32, ccmask>;
def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
@@ -152,6 +164,18 @@ multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
def CGI : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
brtarget16:$RI4),
"cgij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLR : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
+ brtarget16:$RI4),
+ "clrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLGR : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
+ brtarget16:$RI4),
+ "clgrj"##name##"\t$R1, $R2, $RI4", []>;
+ def CLI : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
+ brtarget16:$RI4),
+ "clij"##name##"\t$R1, $I2, $RI4", []>;
+ def CLGI : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
+ brtarget16:$RI4),
+ "clgij"##name##"\t$R1, $I2, $RI4", []>;
}
}
multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
@@ -177,22 +201,31 @@ let Defs = [CC] in {
// Select instructions
//===----------------------------------------------------------------------===//
-def Select32 : SelectWrapper<GR32>;
-def Select64 : SelectWrapper<GR64>;
-
-defm CondStore8_32 : CondStores<GR32, nonvolatile_truncstorei8,
+def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+// We don't define 32-bit Mux stores because the low-only STOC should
+// always be used if possible.
+defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>,
+ Requires<[FeatureHighWord]>;
+defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
-defm CondStore16_32 : CondStores<GR32, nonvolatile_truncstorei16,
+defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32_32 : CondStores<GR32, nonvolatile_store,
+defm CondStore32 : CondStores<GR32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
-defm CondStore8 : CondStores<GR64, nonvolatile_truncstorei8,
- nonvolatile_anyextloadi8, bdxaddr20only>;
-defm CondStore16 : CondStores<GR64, nonvolatile_truncstorei16,
- nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32 : CondStores<GR64, nonvolatile_truncstorei32,
- nonvolatile_anyextloadi32, bdxaddr20only>;
+defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
+ nonvolatile_anyextloadi8, bdxaddr20only>;
+defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
+ nonvolatile_anyextloadi16, bdxaddr20only>;
+defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
+ nonvolatile_anyextloadi32, bdxaddr20only>;
defm CondStore64 : CondStores<GR64, nonvolatile_store,
nonvolatile_load, bdxaddr20only>;
@@ -202,24 +235,30 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// The definitions here are for the call-clobbered registers.
let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC],
- R1 = 14, isCodeGenOnly = 1 in {
- def BRAS : InstRI<0xA75, (outs), (ins pcrel16call:$I2, variable_ops),
- "bras\t%r14, $I2", []>;
- def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$I2, variable_ops),
- "brasl\t%r14, $I2", [(z_call pcrel32call:$I2)]>;
- def BASR : InstRR<0x0D, (outs), (ins ADDR64:$R2, variable_ops),
- "basr\t%r14, $R2", [(z_call ADDR64:$R2)]>;
+ F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in {
+ def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
+ [(z_call pcrel32:$I2)]>;
+ def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
+ [(z_call ADDR64:$R2)]>;
+}
+
+// Sibling calls. Indirect sibling calls must be via R1, since R2 upwards
+// are argument registers and since branching to R0 is a no-op.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def CallJG : Alias<6, (outs), (ins pcrel32:$I2),
+ [(z_sibcall pcrel32:$I2)]>;
+ let Uses = [R1D] in
+ def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
}
// Define the general form of the call instructions for the asm parser.
// These instructions don't hard-code %r14 as the return address register.
-def AsmBRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
- "bras\t$R1, $I2", []>;
-def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
- "brasl\t$R1, $I2", []>;
-def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
- "basr\t$R1, $R2", []>;
+def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
+ "bras\t$R1, $I2", []>;
+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
+ "brasl\t$R1, $I2", []>;
+def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
+ "basr\t$R1, $R2", []>;
//===----------------------------------------------------------------------===//
// Move instructions
@@ -227,6 +266,9 @@ def AsmBASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
// Register moves.
let neverHasSideEffects = 1 in {
+ // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
+ def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>;
def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
}
@@ -248,7 +290,10 @@ let Uses = [CC] in {
// Immediate moves.
let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
isReMaterializable = 1 in {
- // 16-bit sign-extended immediates.
+ // 16-bit sign-extended immediates. LHIMux expands to LHI or IIHF,
+ // deopending on the choice of register.
+ def LHIMux : UnaryRIPseudo<bitconvert, GRX32, imm32sx16>,
+ Requires<[FeatureHighWord]>;
def LHI : UnaryRI<"lhi", 0xA78, bitconvert, GR32, imm32sx16>;
def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
@@ -266,7 +311,12 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
// Register loads.
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+ // Expands to L, LY or LFH, depending on the choice of register.
+ def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
+ def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
def LG : UnaryRXY<"lg", 0xE304, load, GR64, 8>;
// These instructions are split after register allocation, so we don't
@@ -298,8 +348,12 @@ let Uses = [CC] in {
// Register stores.
let SimpleBDXStore = 1 in {
- let isCodeGenOnly = 1 in
- defm ST32 : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ // Expands to ST, STY or STFH, depending on the choice of register.
+ def STMux : StoreRXYPseudo<store, GRX32, 4>,
+ Requires<[FeatureHighWord]>;
+ defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
+ def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>,
+ Requires<[FeatureHighWord]>;
def STG : StoreRXY<"stg", 0xE324, store, GR64, 8>;
// These instructions are split after register allocation, so we don't
@@ -309,15 +363,13 @@ let SimpleBDXStore = 1 in {
[(store GR128:$src, bdxaddr20only128:$dst)]>;
}
}
-let isCodeGenOnly = 1 in
- def STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+def STRL : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
// Store on condition.
let isCodeGenOnly = 1, Uses = [CC] in {
- def STOC32 : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
- def STOC : CondStoreRSY<"stoc", 0xEBF3, GR64, 4>;
- def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
+ def STOC : CondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
+ def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
}
let Uses = [CC] in {
def AsmSTOC : AsmCondStoreRSY<"stoc", 0xEBF3, GR32, 4>;
@@ -334,33 +386,22 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
// Memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
- def MVC : InstSS<0xD2, (outs), (ins bdladdr12onlylen8:$BDL1,
- bdaddr12only:$BD2),
- "mvc\t$BDL1, $BD2", []>;
-
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1 in
- def MVCWrapper : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm32len8:$length),
- [(z_mvc bdaddr12only:$dest, bdaddr12only:$src,
- imm32len8:$length)]>;
-
-defm LoadStore8_32 : MVCLoadStore<anyextloadi8, truncstorei8, i32,
- MVCWrapper, 1>;
-defm LoadStore16_32 : MVCLoadStore<anyextloadi16, truncstorei16, i32,
- MVCWrapper, 2>;
-defm LoadStore32_32 : MVCLoadStore<load, store, i32, MVCWrapper, 4>;
-
-defm LoadStore8 : MVCLoadStore<anyextloadi8, truncstorei8, i64,
- MVCWrapper, 1>;
-defm LoadStore16 : MVCLoadStore<anyextloadi16, truncstorei16, i64,
- MVCWrapper, 2>;
-defm LoadStore32 : MVCLoadStore<anyextloadi32, truncstorei32, i64,
- MVCWrapper, 4>;
-defm LoadStore64 : MVCLoadStore<load, store, i64, MVCWrapper, 8>;
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+
+// String moves.
+let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+ defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
//===----------------------------------------------------------------------===//
// Sign extensions
//===----------------------------------------------------------------------===//
+//
+// Note that putting these before zero extensions mean that we will prefer
+// them for anyextload*. There's not really much to choose between the two
+// either way, but signed-extending loads have a short LH and a long LHY,
+// while zero-extending loads have only the long LLH.
+//
+//===----------------------------------------------------------------------===//
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
@@ -380,40 +421,33 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
// Match 32-to-64-bit sign extensions in which the source is already
// in a 64-bit register.
def : Pat<(sext_inreg GR64:$src, i32),
- (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// 32-bit extensions from memory.
-def LB : UnaryRXY<"lb", 0xE376, sextloadi8, GR32, 1>;
-defm LH : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32, 2>;
-def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+ (LGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
+
+// 32-bit extensions from 8-bit memory. LBMux expands to LB or LBH,
+// depending on the choice of register.
+def LBMux : UnaryRXYPseudo<"lb", asextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LB : UnaryRXY<"lb", 0xE376, asextloadi8, GR32, 1>;
+def LBH : UnaryRXY<"lbh", 0xE3C0, asextloadi8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LHMux expands to LH or LHH,
+// depending on the choice of register.
+def LHMux : UnaryRXYPseudo<"lh", asextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+defm LH : UnaryRXPair<"lh", 0x48, 0xE378, asextloadi16, GR32, 2>;
+def LHH : UnaryRXY<"lhh", 0xE3C4, asextloadi16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_asextloadi16, GR32>;
// 64-bit extensions from memory.
-def LGB : UnaryRXY<"lgb", 0xE377, sextloadi8, GR64, 1>;
-def LGH : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64, 2>;
-def LGF : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64, 4>;
-def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
-def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
+def LGB : UnaryRXY<"lgb", 0xE377, asextloadi8, GR64, 1>;
+def LGH : UnaryRXY<"lgh", 0xE315, asextloadi16, GR64, 2>;
+def LGF : UnaryRXY<"lgf", 0xE314, asextloadi32, GR64, 4>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_asextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_asextloadi32, GR64>;
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
- def LTGF : UnaryRXY<"ltgf", 0xE332, sextloadi32, GR64, 4>;
-
-// If the sign of a load-extend operation doesn't matter, use the signed ones.
-// There's not really much to choose between the sign and zero extensions,
-// but LH is more compact than LLH for small offsets.
-def : Pat<(i32 (extloadi8 bdxaddr20only:$src)), (LB bdxaddr20only:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH bdxaddr12pair:$src)>;
-def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
-
-def : Pat<(i64 (extloadi8 bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
-def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
-
-// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
-// However, BDXs have two extra operands and are therefore 6 units more
-// complex.
-let AddedComplexity = 7 in {
- def : Pat<(i32 (extloadi16 pcrel32:$src)), (LHRL pcrel32:$src)>;
- def : Pat<(i64 (extloadi16 pcrel32:$src)), (LGHRL pcrel32:$src)>;
-}
+ def LTGF : UnaryRXY<"ltgf", 0xE332, asextloadi32, GR64, 4>;
//===----------------------------------------------------------------------===//
// Zero extensions
@@ -421,8 +455,14 @@ let AddedComplexity = 7 in {
// 32-bit extensions from registers.
let neverHasSideEffects = 1 in {
- def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>;
- def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
+ // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
+ def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLCR : UnaryRRE<"llc", 0xB994, zext8, GR32, GR32>;
+ // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
+ def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>,
+ Requires<[FeatureHighWord]>;
+ def LLHR : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
}
// 64-bit extensions from registers.
@@ -435,19 +475,31 @@ let neverHasSideEffects = 1 in {
// Match 32-to-64-bit zero extensions in which the source is already
// in a 64-bit register.
def : Pat<(and GR64:$src, 0xffffffff),
- (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+ (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
-// 32-bit extensions from memory.
-def LLC : UnaryRXY<"llc", 0xE394, zextloadi8, GR32, 1>;
-def LLH : UnaryRXY<"llh", 0xE395, zextloadi16, GR32, 2>;
-def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+// 32-bit extensions from 8-bit memory. LLCMux expands to LLC or LLCH,
+// depending on the choice of register.
+def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
+// depending on the choice of register.
+def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+ Requires<[FeatureHighWord]>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
// 64-bit extensions from memory.
-def LLGC : UnaryRXY<"llgc", 0xE390, zextloadi8, GR64, 1>;
-def LLGH : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64, 2>;
-def LLGF : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64, 4>;
-def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
-def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+def LLGC : UnaryRXY<"llgc", 0xE390, azextloadi8, GR64, 1>;
+def LLGH : UnaryRXY<"llgh", 0xE391, azextloadi16, GR64, 2>;
+def LLGF : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
//===----------------------------------------------------------------------===//
// Truncations
@@ -455,21 +507,31 @@ def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
// Truncations of 64-bit registers to 32-bit registers.
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+ (EXTRACT_SUBREG GR64:$src, subreg_l32)>;
-// Truncations of 32-bit registers to memory.
-let isCodeGenOnly = 1 in {
- defm STC32 : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
- defm STH32 : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
- def STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
-}
+// Truncations of 32-bit registers to 8-bit memory. STCMux expands to
+// STC, STCY or STCH, depending on the choice of register.
+def STCMux : StoreRXYPseudo<truncstorei8, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR32, 1>;
+def STCH : StoreRXY<"stch", 0xE3C3, truncstorei8, GRH32, 1>,
+ Requires<[FeatureHighWord]>;
+
+// Truncations of 32-bit registers to 16-bit memory. STHMux expands to
+// STH, STHY or STHH, depending on the choice of register.
+def STHMux : StoreRXYPseudo<truncstorei16, GRX32, 1>,
+ Requires<[FeatureHighWord]>;
+defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32, 2>;
+def STHH : StoreRXY<"sthh", 0xE3C7, truncstorei16, GRH32, 2>,
+ Requires<[FeatureHighWord]>;
+def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
// Truncations of 64-bit registers to memory.
-defm STC : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8, GR64, 1>;
-defm STH : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64, 2>;
-def STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
-defm ST : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64, 4>;
-def STRL : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+defm : StoreGR64Pair<STC, STCY, truncstorei8>;
+defm : StoreGR64Pair<STH, STHY, truncstorei16>;
+def : StoreGR64PC<STHRL, aligned_truncstorei16>;
+defm : StoreGR64Pair<ST, STY, truncstorei32>;
+def : StoreGR64PC<STRL, aligned_truncstorei32>;
//===----------------------------------------------------------------------===//
// Multi-register moves
@@ -528,11 +590,31 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
}
//===----------------------------------------------------------------------===//
-// Negation
+// Absolute and Negation
//===----------------------------------------------------------------------===//
let Defs = [CC] in {
let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LPR : UnaryRR <"lp", 0x10, z_iabs32, GR32, GR32>;
+ def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>;
+}
+defm : SXU<z_iabs64, LPGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
+ def LNR : UnaryRR <"ln", 0x11, z_inegabs32, GR32, GR32>;
+ def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs64, GR64, GR64>;
+ }
+ let CCValues = 0xE, CompareZeroCCMask = 0xE in
+ def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>;
+}
+defm : SXU<z_inegabs64, LNGFR>;
+
+let Defs = [CC] in {
+ let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def LCR : UnaryRR <"lc", 0x13, ineg, GR32, GR32>;
def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
}
@@ -546,43 +628,51 @@ defm : SXU<ineg, LCGFR>;
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1 in
- defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8, 1>;
-defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8, 1>;
+ defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, azextloadi8, 1>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, azextloadi8, 1>;
-defm : InsertMem<"inserti8", IC32, GR32, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC32, GR32, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, azextloadi8, bdxaddr20pair>;
-defm : InsertMem<"inserti8", IC, GR64, zextloadi8, bdxaddr12pair>;
-defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+defm : InsertMem<"inserti8", IC, GR64, azextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, azextloadi8, bdxaddr20pair>;
// Insertions of a 16-bit immediate, leaving other bits unaffected.
// We don't have or_as_insert equivalents of these operations because
// OI is available instead.
-let isCodeGenOnly = 1 in {
- def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
- def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
-}
-def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
-def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
-def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
-def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+//
+// IIxMux expands to II[LH]x, depending on the choice of register.
+def IILMux : BinaryRIPseudo<insertll, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+def IIHMux : BinaryRIPseudo<insertlh, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, insertll, GRH32, imm32ll16>;
+def IIHH : BinaryRI<"iihh", 0xA50, insertlh, GRH32, imm32lh16>;
+def IILL64 : BinaryAliasRI<insertll, GR64, imm64ll16>;
+def IILH64 : BinaryAliasRI<insertlh, GR64, imm64lh16>;
+def IIHL64 : BinaryAliasRI<inserthl, GR64, imm64hl16>;
+def IIHH64 : BinaryAliasRI<inserthh, GR64, imm64hh16>;
// ...likewise for 32-bit immediates. For GR32s this is a general
// full-width move. (We use IILF rather than something like LLILF
// for 32-bit moves because IILF leaves the upper 32 bits of the
// GR64 unchanged.)
-let isCodeGenOnly = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
- isReMaterializable = 1 in {
- def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in {
+ def IIFMux : UnaryRIPseudo<bitconvert, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def IILF : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+ def IIHF : UnaryRIL<"iihf", 0xC08, bitconvert, GRH32, uimm32>;
}
-def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
-def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+def IILF64 : BinaryAliasRIL<insertlf, GR64, imm64lf32>;
+def IIHF64 : BinaryAliasRIL<inserthf, GR64, imm64hf32>;
// An alternative model of inserthf, with the first operand being
// a zero-extended value.
def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
- (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
- imm64hf32:$imm)>;
+ (IIHF64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ imm64hf32:$imm)>;
//===----------------------------------------------------------------------===//
// Addition
@@ -598,17 +688,22 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>;
// Addition of signed 16-bit immediates.
+ defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
defm AHI : BinaryRIAndK<"ahi", 0xA7A, 0xECD8, add, GR32, imm32sx16>;
defm AGHI : BinaryRIAndK<"aghi", 0xA7B, 0xECD9, add, GR64, imm64sx16>;
// Addition of signed 32-bit immediates.
+ def AFIMux : BinaryRIPseudo<add, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
def AFI : BinaryRIL<"afi", 0xC29, add, GR32, simm32>;
+ def AIH : BinaryRIL<"aih", 0xCC8, add, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
// Addition of memory.
- defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16, 2>;
+ defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, asextloadi16, 2>;
defm A : BinaryRXPair<"a", 0x5A, 0xE35A, add, GR32, load, 4>;
- def AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32, 4>;
+ def AGF : BinaryRXY<"agf", 0xE318, add, GR64, asextloadi32, 4>;
def AG : BinaryRXY<"ag", 0xE308, add, GR64, load, 8>;
// Addition to memory.
@@ -638,7 +733,7 @@ let Defs = [CC] in {
// Addition of memory.
defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load, 4>;
- def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32, 4>;
+ def ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, azextloadi32, 4>;
def ALG : BinaryRXY<"alg", 0xE30A, addc, GR64, load, 8>;
}
defm : ZXB<addc, GR64, ALGFR>;
@@ -667,9 +762,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>;
// Subtraction of memory.
- defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, sextloadi16, 2>;
+ defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load, 4>;
- def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32, 4>;
+ def SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, asextloadi32, 4>;
def SG : BinaryRXY<"sg", 0xE309, sub, GR64, load, 8>;
}
defm : SXB<sub, GR64, SGFR>;
@@ -688,7 +783,7 @@ let Defs = [CC] in {
// Subtraction of memory.
defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load, 4>;
- def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32, 4>;
+ def SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, azextloadi32, 4>;
def SLG : BinaryRXY<"slg", 0xE30B, subc, GR64, load, 8>;
}
defm : ZXB<subc, GR64, SLGFR>;
@@ -718,22 +813,33 @@ let Defs = [CC] in {
let isConvertibleToThreeAddress = 1 in {
// ANDs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
- let isCodeGenOnly = 1 in {
- def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
- def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
- }
- def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
- def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
- def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
- def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+ //
+ // NIxMux expands to NI[LH]x, depending on the choice of register.
+ def NILMux : BinaryRIPseudo<and, GRX32, imm32ll16c>,
+ Requires<[FeatureHighWord]>;
+ def NIHMux : BinaryRIPseudo<and, GRX32, imm32lh16c>,
+ Requires<[FeatureHighWord]>;
+ def NILL : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+ def NILH : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+ def NIHL : BinaryRI<"nihl", 0xA55, and, GRH32, imm32ll16c>;
+ def NIHH : BinaryRI<"nihh", 0xA54, and, GRH32, imm32lh16c>;
+ def NILL64 : BinaryAliasRI<and, GR64, imm64ll16c>;
+ def NILH64 : BinaryAliasRI<and, GR64, imm64lh16c>;
+ def NIHL64 : BinaryAliasRI<and, GR64, imm64hl16c>;
+ def NIHH64 : BinaryAliasRI<and, GR64, imm64hh16c>;
// ANDs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
- def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
- def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to NILF or NIHF, depending on the choice of register.
+ def NIFMux : BinaryRIPseudo<and, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def NILF : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+ def NIHF : BinaryRIL<"nihf", 0xC0A, and, GRH32, uimm32>;
+ }
+ def NILF64 : BinaryAliasRIL<and, GR64, imm64lf32c>;
+ def NIHF64 : BinaryAliasRIL<and, GR64, imm64hf32c>;
}
// ANDs of memory.
@@ -744,6 +850,10 @@ let Defs = [CC] in {
// AND to memory
defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+
+ // Block AND.
+ let mayLoad = 1, mayStore = 1 in
+ defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
}
defm : RMWIByte<and, bdaddr12pair, NI>;
defm : RMWIByte<and, bdaddr20pair, NIY>;
@@ -761,22 +871,33 @@ let Defs = [CC] in {
// ORs of a 16-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 16-bit field, not the full register.
- let isCodeGenOnly = 1 in {
- def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
- def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
- }
- def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
- def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
- def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
- def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+ //
+ // OIxMux expands to OI[LH]x, depending on the choice of register.
+ def OILMux : BinaryRIPseudo<or, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def OIHMux : BinaryRIPseudo<or, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def OILL : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+ def OILH : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+ def OIHL : BinaryRI<"oihl", 0xA59, or, GRH32, imm32ll16>;
+ def OIHH : BinaryRI<"oihh", 0xA58, or, GRH32, imm32lh16>;
+ def OILL64 : BinaryAliasRI<or, GR64, imm64ll16>;
+ def OILH64 : BinaryAliasRI<or, GR64, imm64lh16>;
+ def OIHL64 : BinaryAliasRI<or, GR64, imm64hl16>;
+ def OIHH64 : BinaryAliasRI<or, GR64, imm64hh16>;
// ORs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
- def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
- def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to OILF or OIHF, depending on the choice of register.
+ def OIFMux : BinaryRIPseudo<or, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def OILF : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+ def OIHF : BinaryRIL<"oihf", 0xC0C, or, GRH32, uimm32>;
+ }
+ def OILF64 : BinaryAliasRIL<or, GR64, imm64lf32>;
+ def OIHF64 : BinaryAliasRIL<or, GR64, imm64hf32>;
// ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
@@ -786,6 +907,10 @@ let Defs = [CC] in {
// OR to memory
defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+
+ // Block OR.
+ let mayLoad = 1, mayStore = 1 in
+ defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
}
defm : RMWIByte<or, bdaddr12pair, OI>;
defm : RMWIByte<or, bdaddr20pair, OIY>;
@@ -804,10 +929,15 @@ let Defs = [CC] in {
// XORs of a 32-bit immediate, leaving other bits unaffected.
// The CC result only reflects the 32-bit field, which means we can
// use it as a zero indicator for i32 operations but not otherwise.
- let isCodeGenOnly = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in
- def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
- def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
- def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ // Expands to XILF or XIHF, depending on the choice of register.
+ def XIFMux : BinaryRIPseudo<xor, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
+ def XILF : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+ def XIHF : BinaryRIL<"xihf", 0xC06, xor, GRH32, uimm32>;
+ }
+ def XILF64 : BinaryAliasRIL<xor, GR64, imm64lf32>;
+ def XIHF64 : BinaryAliasRIL<xor, GR64, imm64hf32>;
// XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
@@ -817,6 +947,10 @@ let Defs = [CC] in {
// XOR to memory
defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+
+ // Block XOR.
+ let mayLoad = 1, mayStore = 1 in
+ defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
}
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
@@ -842,9 +976,9 @@ def MSFI : BinaryRIL<"msfi", 0xC21, mul, GR32, simm32>;
def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
// Multiplication of memory.
-defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16, 2>;
+defm MH : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, asextloadi16, 2>;
defm MS : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load, 4>;
-def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32, 4>;
+def MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>;
// Multiplication of a register, producing two results.
@@ -909,13 +1043,15 @@ let Defs = [CC] in {
// Forms of RISBG that only affect one word of the destination register.
// They do not set CC.
-let isCodeGenOnly = 1 in
- def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>,
- Requires<[FeatureHighWord]>;
-def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GR64, GR64>,
- Requires<[FeatureHighWord]>;
-def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR64, GR64>,
- Requires<[FeatureHighWord]>;
+def RISBMux : RotateSelectRIEfPseudo<GRX32, GRX32>, Requires<[FeatureHighWord]>;
+def RISBLL : RotateSelectAliasRIEf<GR32, GR32>, Requires<[FeatureHighWord]>;
+def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>, Requires<[FeatureHighWord]>;
+def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>, Requires<[FeatureHighWord]>;
+def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>,
+ Requires<[FeatureHighWord]>;
+def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>,
+ Requires<[FeatureHighWord]>;
// Rotate second operand left and perform a logical operation with selected
// bits of the first operand. The CC result only describes the selected bits,
@@ -930,39 +1066,50 @@ let Defs = [CC] in {
// Comparison
//===----------------------------------------------------------------------===//
-// Signed comparisons.
+// Signed comparisons. We put these before the unsigned comparisons because
+// some of the signed forms have COMPARE AND BRANCH equivalents whereas none
+// of the unsigned forms do.
let Defs = [CC], CCValues = 0xE in {
// Comparison with a register.
- def CR : CompareRR <"c", 0x19, z_cmp, GR32, GR32>;
+ def CR : CompareRR <"c", 0x19, z_scmp, GR32, GR32>;
def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
- def CGR : CompareRRE<"cg", 0xB920, z_cmp, GR64, GR64>;
+ def CGR : CompareRRE<"cg", 0xB920, z_scmp, GR64, GR64>;
// Comparison with a signed 16-bit immediate.
- def CHI : CompareRI<"chi", 0xA7E, z_cmp, GR32, imm32sx16>;
- def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
-
- // Comparison with a signed 32-bit immediate.
- def CFI : CompareRIL<"cfi", 0xC2D, z_cmp, GR32, simm32>;
- def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+ def CHI : CompareRI<"chi", 0xA7E, z_scmp, GR32, imm32sx16>;
+ def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>;
+
+ // Comparison with a signed 32-bit immediate. CFIMux expands to CFI or CIH,
+ // depending on the choice of register.
+ def CFIMux : CompareRIPseudo<z_scmp, GRX32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CFI : CompareRIL<"cfi", 0xC2D, z_scmp, GR32, simm32>;
+ def CIH : CompareRIL<"cih", 0xCCD, z_scmp, GRH32, simm32>,
+ Requires<[FeatureHighWord]>;
+ def CGFI : CompareRIL<"cgfi", 0xC2C, z_scmp, GR64, imm64sx32>;
// Comparison with memory.
- defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16, 2>;
- defm C : CompareRXPair<"c", 0x59, 0xE359, z_cmp, GR32, load, 4>;
- def CGH : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16, 2>;
- def CGF : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32, 4>;
- def CG : CompareRXY<"cg", 0xE320, z_cmp, GR64, load, 8>;
- def CHRL : CompareRILPC<"chrl", 0xC65, z_cmp, GR32, aligned_sextloadi16>;
- def CRL : CompareRILPC<"crl", 0xC6D, z_cmp, GR32, aligned_load>;
- def CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
- def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
- def CGRL : CompareRILPC<"cgrl", 0xC68, z_cmp, GR64, aligned_load>;
+ defm CH : CompareRXPair<"ch", 0x49, 0xE379, z_scmp, GR32, asextloadi16, 2>;
+ def CMux : CompareRXYPseudo<z_scmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ defm C : CompareRXPair<"c", 0x59, 0xE359, z_scmp, GR32, load, 4>;
+ def CHF : CompareRXY<"chf", 0xE3CD, z_scmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CGH : CompareRXY<"cgh", 0xE334, z_scmp, GR64, asextloadi16, 2>;
+ def CGF : CompareRXY<"cgf", 0xE330, z_scmp, GR64, asextloadi32, 4>;
+ def CG : CompareRXY<"cg", 0xE320, z_scmp, GR64, load, 8>;
+ def CHRL : CompareRILPC<"chrl", 0xC65, z_scmp, GR32, aligned_asextloadi16>;
+ def CRL : CompareRILPC<"crl", 0xC6D, z_scmp, GR32, aligned_load>;
+ def CGHRL : CompareRILPC<"cghrl", 0xC64, z_scmp, GR64, aligned_asextloadi16>;
+ def CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_scmp, GR64, aligned_asextloadi32>;
+ def CGRL : CompareRILPC<"cgrl", 0xC68, z_scmp, GR64, aligned_load>;
// Comparison between memory and a signed 16-bit immediate.
- def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
- def CHSI : CompareSIL<"chsi", 0xE55C, z_cmp, load, imm32sx16>;
- def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load, imm64sx16>;
+ def CHHSI : CompareSIL<"chhsi", 0xE554, z_scmp, asextloadi16, imm32sx16>;
+ def CHSI : CompareSIL<"chsi", 0xE55C, z_scmp, load, imm32sx16>;
+ def CGHSI : CompareSIL<"cghsi", 0xE558, z_scmp, load, imm64sx16>;
}
-defm : SXB<z_cmp, GR64, CGFR>;
+defm : SXB<z_scmp, GR64, CGFR>;
// Unsigned comparisons.
let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
@@ -971,35 +1118,79 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
def CLGR : CompareRRE<"clg", 0xB921, z_ucmp, GR64, GR64>;
- // Comparison with a signed 32-bit immediate.
+ // Comparison with an unsigned 32-bit immediate. CLFIMux expands to CLFI
+ // or CLIH, depending on the choice of register.
+ def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>,
+ Requires<[FeatureHighWord]>;
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
// Comparison with memory.
+ def CLMux : CompareRXYPseudo<z_ucmp, GRX32, load, 4>,
+ Requires<[FeatureHighWord]>;
defm CL : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load, 4>;
- def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32, 4>;
+ def CLHF : CompareRXY<"clhf", 0xE3CF, z_ucmp, GRH32, load, 4>,
+ Requires<[FeatureHighWord]>;
+ def CLGF : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, azextloadi32, 4>;
def CLG : CompareRXY<"clg", 0xE321, z_ucmp, GR64, load, 8>;
def CLHRL : CompareRILPC<"clhrl", 0xC67, z_ucmp, GR32,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLRL : CompareRILPC<"clrl", 0xC6F, z_ucmp, GR32,
aligned_load>;
def CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
- aligned_zextloadi16>;
+ aligned_azextloadi16>;
def CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
- aligned_zextloadi32>;
+ aligned_azextloadi32>;
def CLGRL : CompareRILPC<"clgrl", 0xC6A, z_ucmp, GR64,
aligned_load>;
// Comparison between memory and an unsigned 8-bit immediate.
- defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+ defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, azextloadi8, imm32zx8>;
// Comparison between memory and an unsigned 16-bit immediate.
- def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
- def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
- def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
+ def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, azextloadi16, imm32zx16>;
+ def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load, imm32zx16>;
+ def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load, imm64zx16>;
}
defm : ZXB<z_ucmp, GR64, CLGFR>;
+// Memory-to-memory comparison.
+let mayLoad = 1, Defs = [CC] in
+ defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+
+// String comparison.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
+
+// Test under mask.
+let Defs = [CC] in {
+ // TMxMux expands to TM[LH]x, depending on the choice of register.
+ def TMLMux : CompareRIPseudo<z_tm_reg, GRX32, imm32ll16>,
+ Requires<[FeatureHighWord]>;
+ def TMHMux : CompareRIPseudo<z_tm_reg, GRX32, imm32lh16>,
+ Requires<[FeatureHighWord]>;
+ def TMLL : CompareRI<"tmll", 0xA71, z_tm_reg, GR32, imm32ll16>;
+ def TMLH : CompareRI<"tmlh", 0xA70, z_tm_reg, GR32, imm32lh16>;
+ def TMHL : CompareRI<"tmhl", 0xA73, z_tm_reg, GRH32, imm32ll16>;
+ def TMHH : CompareRI<"tmhh", 0xA72, z_tm_reg, GRH32, imm32lh16>;
+
+ def TMLL64 : CompareAliasRI<z_tm_reg, GR64, imm64ll16>;
+ def TMLH64 : CompareAliasRI<z_tm_reg, GR64, imm64lh16>;
+ def TMHL64 : CompareAliasRI<z_tm_reg, GR64, imm64hl16>;
+ def TMHH64 : CompareAliasRI<z_tm_reg, GR64, imm64hh16>;
+
+ defm TM : CompareSIPair<"tm", 0x91, 0xEB51, z_tm_mem, anyextloadi8, imm32zx8>;
+}
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
+def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
+
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//
@@ -1024,60 +1215,60 @@ def ATOMIC_LOAD_SGR : AtomicLoadBinaryReg64<atomic_load_sub_64>;
def ATOMIC_LOADW_NR : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
def ATOMIC_LOADW_NILH : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
def ATOMIC_LOAD_NR : AtomicLoadBinaryReg32<atomic_load_and_32>;
-def ATOMIC_LOAD_NILL32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
-def ATOMIC_LOAD_NILH32 : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
-def ATOMIC_LOAD_NILF32 : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
def ATOMIC_LOAD_NGR : AtomicLoadBinaryReg64<atomic_load_and_64>;
-def ATOMIC_LOAD_NILL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
-def ATOMIC_LOAD_NILH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
-def ATOMIC_LOAD_NIHL : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
-def ATOMIC_LOAD_NIHH : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
-def ATOMIC_LOAD_NILF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
-def ATOMIC_LOAD_NIHF : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+def ATOMIC_LOAD_NILL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF64 : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
def ATOMIC_LOADW_OR : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
def ATOMIC_LOADW_OILH : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
def ATOMIC_LOAD_OR : AtomicLoadBinaryReg32<atomic_load_or_32>;
-def ATOMIC_LOAD_OILL32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
-def ATOMIC_LOAD_OILH32 : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
-def ATOMIC_LOAD_OILF32 : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
def ATOMIC_LOAD_OGR : AtomicLoadBinaryReg64<atomic_load_or_64>;
-def ATOMIC_LOAD_OILL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
-def ATOMIC_LOAD_OILH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
-def ATOMIC_LOAD_OIHL : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
-def ATOMIC_LOAD_OIHH : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
-def ATOMIC_LOAD_OILF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
-def ATOMIC_LOAD_OIHF : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+def ATOMIC_LOAD_OILL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF64 : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
def ATOMIC_LOADW_XR : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
def ATOMIC_LOADW_XILF : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
def ATOMIC_LOAD_XR : AtomicLoadBinaryReg32<atomic_load_xor_32>;
-def ATOMIC_LOAD_XILF32 : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
def ATOMIC_LOAD_XGR : AtomicLoadBinaryReg64<atomic_load_xor_64>;
-def ATOMIC_LOAD_XILF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
-def ATOMIC_LOAD_XIHF : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+def ATOMIC_LOAD_XILF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF64 : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
def ATOMIC_LOADW_NRi : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
def ATOMIC_LOADW_NILHi : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
imm32lh16c>;
def ATOMIC_LOAD_NRi : AtomicLoadBinaryReg32<atomic_load_nand_32>;
-def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32ll16c>;
-def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm32<atomic_load_nand_32,
imm32lh16c>;
-def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
def ATOMIC_LOAD_NGRi : AtomicLoadBinaryReg64<atomic_load_nand_64>;
-def ATOMIC_LOAD_NILLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64ll16c>;
-def ATOMIC_LOAD_NILHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lh16c>;
-def ATOMIC_LOAD_NIHLi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHL64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hl16c>;
-def ATOMIC_LOAD_NIHHi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHH64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hh16c>;
-def ATOMIC_LOAD_NILFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NILF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64lf32c>;
-def ATOMIC_LOAD_NIHFi : AtomicLoadBinaryImm64<atomic_load_nand_64,
+def ATOMIC_LOAD_NIHF64i : AtomicLoadBinaryImm64<atomic_load_nand_64,
imm64hf32c>;
def ATOMIC_LOADW_MIN : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
@@ -1119,6 +1310,10 @@ let Defs = [CC] in {
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
+// Extract CC into bits 29 and 28 of a register.
+let Uses = [CC] in
+ def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>;
+
// Read a 32-bit access register into a GR32. As with all GR32 operations,
// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
// when a 64-bit address is stored in a pair of access registers.
@@ -1134,19 +1329,11 @@ let Defs = [CC] in {
def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>;
}
def : Pat<(ctlz GR64:$src),
- (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+ (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
-
-// There are no 32-bit equivalents of LLILL and LLILH, so use a full
-// 64-bit move followed by a subreg. This preserves the invariant that
-// all GR32 operations only modify the low 32 bits.
-def : Pat<(i32 imm32ll16:$src),
- (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
-def : Pat<(i32 imm32lh16:$src),
- (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
// Extend GR32s and GR64s to GR128s.
let usesCustomInserter = 1 in {
@@ -1155,6 +1342,10 @@ let usesCustomInserter = 1 in {
def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
}
+// Search a block of memory for a character.
+let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+ defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
+
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//
@@ -1163,14 +1354,14 @@ let usesCustomInserter = 1 in {
defm : ZXB<add, GR64, ALGFR>;
def : Pat<(add GR64:$src1, imm64zx32:$src2),
(ALGFI GR64:$src1, imm64zx32:$src2)>;
-def : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(add GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(ALGF GR64:$src1, bdxaddr20only:$addr)>;
// Use SL* for GR64 subtractions of unsigned 32-bit values.
defm : ZXB<sub, GR64, SLGFR>;
def : Pat<(add GR64:$src1, imm64zx32n:$src2),
(SLGFI GR64:$src1, imm64zx32n:$src2)>;
-def : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
(SLGF GR64:$src1, bdxaddr20only:$addr)>;
// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
@@ -1184,3 +1375,19 @@ def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid,
(i32 63)),
(i32 63)),
(Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+
+// Peepholes for turning scalar operations into block operations.
+defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
+ OCSequence, XCSequence, 1>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 2>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 4>;
+defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
+ XCSequence, 8>;
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 114f74e..ba027d4 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -225,11 +225,13 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
Terminator.ExtraRelaxSize = 6;
break;
case SystemZ::CRJ:
- // Relaxes to a CR/BRCL sequence, which is 2 bytes longer.
+ case SystemZ::CLRJ:
+ // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
Terminator.ExtraRelaxSize = 2;
break;
case SystemZ::CGRJ:
- // Relaxes to a CGR/BRCL sequence, which is 4 bytes longer.
+ case SystemZ::CLGRJ:
+ // Relaxes to a C(L)GR/BRCL sequence, which is 4 bytes longer.
Terminator.ExtraRelaxSize = 4;
break;
case SystemZ::CIJ:
@@ -237,6 +239,11 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) {
// Relaxes to a C(G)HI/BRCL sequence, which is 4 bytes longer.
Terminator.ExtraRelaxSize = 4;
break;
+ case SystemZ::CLIJ:
+ case SystemZ::CLGIJ:
+ // Relaxes to a CL(G)FI/BRCL sequence, which is 6 bytes longer.
+ Terminator.ExtraRelaxSize = 6;
+ break;
default:
llvm_unreachable("Unrecognized branch instruction");
}
@@ -401,6 +408,18 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
case SystemZ::CGIJ:
splitCompareBranch(Branch, SystemZ::CGHI);
break;
+ case SystemZ::CLRJ:
+ splitCompareBranch(Branch, SystemZ::CLR);
+ break;
+ case SystemZ::CLGRJ:
+ splitCompareBranch(Branch, SystemZ::CLGR);
+ break;
+ case SystemZ::CLIJ:
+ splitCompareBranch(Branch, SystemZ::CLFI);
+ break;
+ case SystemZ::CLGIJ:
+ splitCompareBranch(Branch, SystemZ::CLGFI);
+ break;
default:
llvm_unreachable("Unrecognized branch");
}
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
index 432a0d3..ff9a6c0 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -15,15 +15,6 @@
using namespace llvm;
-// If Opcode is an interprocedural reference that can be shortened,
-// return the short form, otherwise return 0.
-static unsigned getShortenedInstr(unsigned Opcode) {
- switch (Opcode) {
- case SystemZ::BRASL: return SystemZ::BRAS;
- }
- return Opcode;
-}
-
// Return the VK_* enumeration for MachineOperand target flags Flags.
static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
@@ -35,70 +26,71 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
}
-SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+SystemZMCInstLower::SystemZMCInstLower(MCContext &ctx,
SystemZAsmPrinter &asmprinter)
- : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+ : Ctx(ctx), AsmPrinter(asmprinter) {}
-MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol,
- int64_t Offset) const {
- MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
- const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
- if (Offset) {
- const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
- Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+const MCExpr *
+SystemZMCInstLower::getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ const MCSymbol *Symbol;
+ bool HasOffset = true;
+ switch (MO.getType()) {
+ case MachineOperand::MO_MachineBasicBlock:
+ Symbol = MO.getMBB()->getSymbol();
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_GlobalAddress:
+ Symbol = AsmPrinter.getSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+ HasOffset = false;
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+ break;
+
+ case MachineOperand::MO_BlockAddress:
+ Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+ break;
+
+ default:
+ llvm_unreachable("unknown operand type");
}
- return MCOperand::CreateExpr(Expr);
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+ if (HasOffset)
+ if (int64_t Offset = MO.getOffset()) {
+ const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+ Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+ }
+ return Expr;
}
MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
switch (MO.getType()) {
- default:
- llvm_unreachable("unknown operand type");
-
case MachineOperand::MO_Register:
return MCOperand::CreateReg(MO.getReg());
case MachineOperand::MO_Immediate:
return MCOperand::CreateImm(MO.getImm());
- case MachineOperand::MO_MachineBasicBlock:
- return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_GlobalAddress:
- return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
- MO.getOffset());
-
- case MachineOperand::MO_ExternalSymbol: {
- StringRef Name = MO.getSymbolName();
- return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
- MO.getOffset());
- }
-
- case MachineOperand::MO_JumpTableIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
- /* MO has no offset field */0);
-
- case MachineOperand::MO_ConstantPoolIndex:
- return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
- MO.getOffset());
-
- case MachineOperand::MO_BlockAddress: {
- const BlockAddress *BA = MO.getBlockAddress();
- return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
- MO.getOffset());
+ default: {
+ MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+ return MCOperand::CreateExpr(getExpr(MO, Kind));
}
}
}
void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- unsigned Opcode = MI->getOpcode();
- // When emitting binary code, start with the shortest form of an instruction
- // and then relax it where necessary.
- if (!AsmPrinter.OutStreamer.hasRawTextSupport())
- Opcode = getShortenedInstr(Opcode);
- OutMI.setOpcode(Opcode);
+ OutMI.setOpcode(MI->getOpcode());
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
// Ignore all implicit register operands.
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
index db5bdb0..f6d5ac8 100644
--- a/lib/Target/SystemZ/SystemZMCInstLower.h
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -10,27 +10,24 @@
#ifndef LLVM_SYSTEMZMCINSTLOWER_H
#define LLVM_SYSTEMZMCINSTLOWER_H
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
-class MCContext;
class MCInst;
class MCOperand;
-class MCSymbol;
class MachineInstr;
class MachineOperand;
class Mangler;
class SystemZAsmPrinter;
class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
- Mangler *Mang;
MCContext &Ctx;
SystemZAsmPrinter &AsmPrinter;
public:
- SystemZMCInstLower(Mangler *mang, MCContext &ctx,
- SystemZAsmPrinter &asmPrinter);
+ SystemZMCInstLower(MCContext &ctx, SystemZAsmPrinter &asmPrinter);
// Lower MachineInstr MI to MCInst OutMI.
void lower(const MachineInstr *MI, MCInst &OutMI) const;
@@ -38,9 +35,9 @@ public:
// Return an MCOperand for MO.
MCOperand lowerOperand(const MachineOperand& MO) const;
- // Return an MCOperand for MO, given that it equals Symbol + Offset.
- MCOperand lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Symbol, int64_t Offset) const;
+ // Return an MCExpr for symbolic operand MO with variant kind Kind.
+ const MCExpr *getExpr(const MachineOperand &MO,
+ MCSymbolRefExpr::VariantKind Kind) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
new file mode 100644
index 0000000..00572d0
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -0,0 +1,17 @@
+//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineFunctionInfo.h"
+
+using namespace llvm;
+
+
+// pin vtable to this file
+void SystemZMachineFunctionInfo::anchor() {}
+
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 69c2691..845291f 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -15,6 +15,7 @@
namespace llvm {
class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
unsigned LowSavedGPR;
unsigned HighSavedGPR;
unsigned VarArgsFirstGPR;
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 9d79439..3ad146c 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -46,7 +46,8 @@ class PCRelOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
// address with address size VT. SELF is the name of the operand and
// ASMOP is the associated asm operand.
class PCRelAddress<ValueType vt, string self, AsmOperandClass asmop>
- : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
+ : ComplexPattern<vt, 1, "selectPCRelAddress",
+ [z_pcrel_wrapper, z_pcrel_offset]>,
PCRelOperand<vt, asmop> {
let MIOperandInfo = (ops !cast<Operand>(self));
}
@@ -219,11 +220,6 @@ def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
// i32 immediates
//===----------------------------------------------------------------------===//
-// Immediates for 8-bit lengths.
-def imm32len8 : Immediate<i32, [{
- return isUInt<8>(N->getZExtValue() - 1);
-}], NOOP_SDNodeXForm, "U32Imm">;
-
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
def imm32ll16 : Immediate<i32, [{
@@ -338,6 +334,10 @@ def imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
+def imm64zx8 : Immediate<i64, [{
+ return isUInt<8>(N->getSExtValue());
+}], UIMM8, "U8Imm">;
+
def imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
@@ -358,7 +358,7 @@ def imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue());
}], NEGIMM32, "U32Imm">;
-def imm64 : ImmLeaf<i64, [{}]>;
+def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
//===----------------------------------------------------------------------===//
// Floating-point immediates
@@ -396,19 +396,6 @@ def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
let DecoderMethod = "decodePC32DBLOperand";
}
-// A PC-relative offset of a global value when the value is used as a
-// call target. The offset is sign-extended and multiplied by 2.
-def pcrel16call : PCRelAddress<i64, "pcrel16call", PCRel16> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT16DBLEncoding";
- let DecoderMethod = "decodePC16DBLOperand";
-}
-def pcrel32call : PCRelAddress<i64, "pcrel32call", PCRel32> {
- let PrintMethod = "printCallOperand";
- let EncoderMethod = "getPLT32DBLEncoding";
- let DecoderMethod = "decodePC32DBLOperand";
-}
-
//===----------------------------------------------------------------------===//
// Addressing modes
//===----------------------------------------------------------------------===//
@@ -435,6 +422,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
// <type> is one of:
// shift : base + displacement (32-bit)
// bdaddr : base + displacement
+// mviaddr : like bdaddr, but reject cases with a natural index
// bdxaddr : base + displacement + index
// laaddr : like bdxaddr, but used for Load Address operations
// dynalloc : base + displacement + index + ADJDYNALLOC
@@ -460,6 +448,8 @@ def bdaddr12only : BDMode <"BDAddr", "64", "12", "Only">;
def bdaddr12pair : BDMode <"BDAddr", "64", "12", "Pair">;
def bdaddr20only : BDMode <"BDAddr", "64", "20", "Only">;
def bdaddr20pair : BDMode <"BDAddr", "64", "20", "Pair">;
+def mviaddr12pair : BDMode <"MVIAddr", "64", "12", "Pair">;
+def mviaddr20pair : BDMode <"MVIAddr", "64", "20", "Pair">;
def bdxaddr12only : BDXMode<"BDXAddr", "64", "12", "Only">;
def bdxaddr12pair : BDXMode<"BDXAddr", "64", "12", "Pair">;
def bdxaddr20only : BDXMode<"BDXAddr", "64", "20", "Only">;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 6a3af2b..31cabaa 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -15,6 +15,9 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i64>,
SDTCisVT<1, i64>]>;
def SDT_ZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_ZCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZICmp : SDTypeProfile<0, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ZBRCCMask : SDTypeProfile<0, 3,
[SDTCisVT<0, i8>,
SDTCisVT<1, i8>,
@@ -27,6 +30,10 @@ def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
+def SDT_ZWrapOffset : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<0>]>;
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
[SDTCisVT<0, i32>,
@@ -54,10 +61,24 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>,
SDTCisVT<6, i32>]>;
-def SDT_ZCopy : SDTypeProfile<0, 3,
+def SDT_ZMemMemLength : SDTypeProfile<0, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
- SDTCisVT<2, i32>]>;
+ SDTCisVT<2, i64>]>;
+def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def SDT_ZString : SDTypeProfile<1, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, i32>]>;
+def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_ZPrefetch : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i8>,
+ SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -76,9 +97,15 @@ def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+ SDNPVariadic]>;
def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
-def z_cmp : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
-def z_ucmp : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET",
+ SDT_ZWrapOffset, []>;
+def z_icmp : SDNode<"SystemZISD::ICMP", SDT_ZICmp, [SDNPOutGlue]>;
+def z_fcmp : SDNode<"SystemZISD::FCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_tm : SDNode<"SystemZISD::TM", SDT_ZICmp, [SDNPOutGlue]>;
def z_br_ccmask : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
[SDNPHasChain, SDNPInGlue]>;
def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
@@ -109,13 +136,56 @@ def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
-def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZCopy,
+def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoop,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
+def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
+def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
+ [SDNPInGlue]>;
+def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
+ [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+ SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
+// Signed and unsigned comparisons.
+def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::UnsignedOnly;
+}]>;
+def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
+ unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ return Type != SystemZICMP::SignedOnly;
+}]>;
+
+// Register- and memory-based TEST UNDER MASK.
+def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
+def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
+
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
@@ -130,6 +200,36 @@ def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+// Extending loads in which the extension type can be signed.
+def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::SEXTLOAD;
+}]>;
+def asextloadi8 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def asextloadi16 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def asextloadi32 : PatFrag<(ops node:$ptr), (asextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// Extending loads in which the extension type can be unsigned.
+def azextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+ unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
+ return Type == ISD::EXTLOAD || Type == ISD::ZEXTLOAD;
+}]>;
+def azextloadi8 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def azextloadi16 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def azextloadi32 : PatFrag<(ops node:$ptr), (azextload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
// Extending loads in which the extension type doesn't matter.
def anyextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
return cast<LoadSDNode>(N)->getExtensionType() != ISD::NON_EXTLOAD;
@@ -150,11 +250,11 @@ class AlignedLoad<SDPatternOperator load>
LoadSDNode *Load = cast<LoadSDNode>(N);
return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
}]>;
-def aligned_load : AlignedLoad<load>;
-def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
-def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
-def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
-def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+def aligned_load : AlignedLoad<load>;
+def aligned_asextloadi16 : AlignedLoad<asextloadi16>;
+def aligned_asextloadi32 : AlignedLoad<asextloadi32>;
+def aligned_azextloadi16 : AlignedLoad<azextloadi16>;
+def aligned_azextloadi32 : AlignedLoad<azextloadi32>;
// Aligned stores.
class AlignedStore<SDPatternOperator store>
@@ -189,6 +289,31 @@ def nonvolatile_truncstorei8 : NonvolatileStore<truncstorei8>;
def nonvolatile_truncstorei16 : NonvolatileStore<truncstorei16>;
def nonvolatile_truncstorei32 : NonvolatileStore<truncstorei32>;
+// A store of a load that can be implemented using MVC.
+def mvc_store : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore node:$value, node:$addr),
+ [{ return storeLoadCanUseMVC(N); }]>;
+
+// Binary read-modify-write operations on memory in which the other
+// operand is also memory and for which block operations like NC can
+// be used. There are two patterns for each operator, depending on
+// which operand contains the "other" load.
+multiclass block_op<SDPatternOperator operator> {
+ def "1" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator node:$value,
+ (unindexedload node:$addr)),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 0); }]>;
+ def "2" : PatFrag<(ops node:$value, node:$addr),
+ (unindexedstore (operator (unindexedload node:$addr),
+ node:$value),
+ node:$addr),
+ [{ return storeLoadCanUseBlockBinary(N, 1); }]>;
+}
+defm block_and : block_op<and>;
+defm block_or : block_op<or>;
+defm block_xor : block_op<xor>;
+
// Insertions.
def inserti8 : PatFrag<(ops node:$src1, node:$src2),
(or (and node:$src1, -256), node:$src2)>;
@@ -221,6 +346,16 @@ def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
APInt::getLowBitsSet(BitWidth, 8));
}]>;
+// Integer absolute, matching the canonical form generated by DAGCombiner.
+def z_iabs32 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 31))),
+ (sra node:$src, (i32 31)))>;
+def z_iabs64 : PatFrag<(ops node:$src),
+ (xor (add node:$src, (sra node:$src, (i32 63))),
+ (sra node:$src, (i32 63)))>;
+def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
+def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
+
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index c442ae0..7706351 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -13,7 +13,7 @@ multiclass SXU<SDPatternOperator operator, Instruction insn> {
def : Pat<(operator (sext (i32 GR32:$src))),
(insn GR32:$src)>;
def : Pat<(operator (sext_inreg GR64:$src, i32)),
- (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+ (insn (EXTRACT_SUBREG GR64:$src, subreg_l32))>;
}
// Record that INSN performs a 64-bit version of binary operator OPERATOR
@@ -24,7 +24,7 @@ multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (sext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Like SXB, but for zero extension.
@@ -33,7 +33,7 @@ multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
def : Pat<(operator cls:$src1, (zext GR32:$src2)),
(insn cls:$src1, GR32:$src2)>;
def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
- (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+ (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_l32))>;
}
// Record that INSN performs a binary read-modify-write operation,
@@ -66,22 +66,87 @@ multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
(insn cls:$src1, mode:$src2)>;
}
-// Use MVC instruction INSN for a load of type LOAD followed by a store
-// of type STORE. VT is the type of the intermediate register and LENGTH
-// is the number of bytes to copy (which may be smaller than VT).
-multiclass MVCLoadStore<SDPatternOperator load, SDPatternOperator store,
- ValueType vt, Instruction insn, bits<5> length> {
- def Pat : PatFrag<(ops node:$dest, node:$src),
- (store (vt (load node:$src)), node:$dest),
- [{ return storeLoadCanUseMVC(N); }]>;
+// INSN stores the low 32 bits of a GPR to a memory with addressing mode MODE.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64<Instruction insn, SDPatternOperator operator,
+ AddressingMode mode>
+ : Pat<(operator GR64:$R1, mode:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), mode:$XBD2)>;
- def : Pat<(!cast<SDPatternOperator>(NAME##"Pat") bdaddr12only:$dest,
- bdaddr12only:$src),
+// INSN and INSNY are an RX/RXY pair of instructions that store the low
+// 32 bits of a GPR to memory. Record that they are equivalent to using
+// OPERATOR to store a GR64.
+multiclass StoreGR64Pair<Instruction insn, Instruction insny,
+ SDPatternOperator operator> {
+ def : StoreGR64<insn, operator, bdxaddr12pair>;
+ def : StoreGR64<insny, operator, bdxaddr20pair>;
+}
+
+// INSN stores the low 32 bits of a GPR using PC-relative addressing.
+// Record that it is equivalent to using OPERATOR to store a GR64.
+class StoreGR64PC<Instruction insn, SDPatternOperator operator>
+ : Pat<(operator GR64:$R1, pcrel32:$XBD2),
+ (insn (EXTRACT_SUBREG GR64:$R1, subreg_l32), pcrel32:$XBD2)> {
+ // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+ // However, BDXs have two extra operands and are therefore 6 units more
+ // complex.
+ let AddedComplexity = 7;
+}
+
+// INSN and INSNINV conditionally store the low 32 bits of a GPR to memory,
+// with INSN storing when the condition is true and INSNINV storing when the
+// condition is false. Record that they are equivalent to a LOAD/select/STORE
+// sequence for GR64s.
+multiclass CondStores64<Instruction insn, Instruction insninv,
+ SDPatternOperator store, SDPatternOperator load,
+ AddressingMode mode> {
+ def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+ def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
+ uimm8zx4:$valid, uimm8zx4:$cc),
+ mode:$addr),
+ (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
+ uimm8zx4:$valid, uimm8zx4:$cc)>;
+}
+
+// Try to use MVC instruction INSN for a load of type LOAD followed by a store
+// of the same size. VT is the type of the intermediate (legalized) value and
+// LENGTH is the number of bytes loaded by LOAD.
+multiclass MVCLoadStore<SDPatternOperator load, ValueType vt, Instruction insn,
+ bits<5> length> {
+ def : Pat<(mvc_store (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
(insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
}
+// Use NC-like instruction INSN for block_op operation OPERATOR.
+// The other operand is a load of type LOAD, which accesses LENGTH bytes.
+// VT is the intermediate legalized type in which the binary operation
+// is actually done.
+multiclass BinaryLoadStore<SDPatternOperator operator, SDPatternOperator load,
+ ValueType vt, Instruction insn, bits<5> length> {
+ def : Pat<(operator (vt (load bdaddr12only:$src)), bdaddr12only:$dest),
+ (insn bdaddr12only:$dest, bdaddr12only:$src, length)>;
+}
+
+// A convenient way of generating all block peepholes for a particular
+// LOAD/VT/LENGTH combination.
+multiclass BlockLoadStore<SDPatternOperator load, ValueType vt,
+ Instruction mvc, Instruction nc, Instruction oc,
+ Instruction xc, bits<5> length> {
+ defm : MVCLoadStore<load, vt, mvc, length>;
+ defm : BinaryLoadStore<block_and1, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_and2, load, vt, nc, length>;
+ defm : BinaryLoadStore<block_or1, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_or2, load, vt, oc, length>;
+ defm : BinaryLoadStore<block_xor1, load, vt, xc, length>;
+ defm : BinaryLoadStore<block_xor2, load, vt, xc, length>;
+}
+
// Record that INSN is a LOAD AND TEST that can be used to compare
// registers in CLS against zero. The instruction has separate R1 and R2
// operands, but they must be the same when the instruction is used like this.
class CompareZeroFP<Instruction insn, RegisterOperand cls>
- : Pat<(z_cmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
+ : Pat<(z_fcmp cls:$reg, (fpimm0)), (insn cls:$reg, cls:$reg)>;
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 7e14aa7..f241fb0 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -31,8 +31,16 @@ def FeatureHighWord : SystemZFeature<
"Assume that the high-word facility is installed"
>;
-def : Processor<"z10", NoItineraries, []>;
-def : Processor<"z196", NoItineraries,
- [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>;
+def FeatureFPExtension : SystemZFeature<
+ "fp-extension", "FPExtension",
+ "Assume that the floating-point extension facility is installed"
+>;
+
+def : Processor<"generic", NoItineraries, []>;
+def : Processor<"z10", NoItineraries, []>;
+def : Processor<"z196", NoItineraries,
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
def : Processor<"zEC12", NoItineraries,
- [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord]>;
+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
+ FeatureFPExtension]>;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 8ce6d6a..b61ae88 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -42,13 +42,15 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasFP(MF)) {
// R11D is the frame pointer. Reserve all aliases.
Reserved.set(SystemZ::R11D);
- Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R11L);
+ Reserved.set(SystemZ::R11H);
Reserved.set(SystemZ::R10Q);
}
// R15D is the stack pointer. Reserve all aliases.
Reserved.set(SystemZ::R15D);
- Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R15L);
+ Reserved.set(SystemZ::R15H);
Reserved.set(SystemZ::R14Q);
return Reserved;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index c447e4d..13f45fa 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -22,10 +22,10 @@ namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
inline unsigned even128(bool Is32bit) {
- return Is32bit ? subreg_32bit : subreg_high;
+ return Is32bit ? subreg_hl32 : subreg_h64;
}
inline unsigned odd128(bool Is32bit) {
- return Is32bit ? subreg_low32 : subreg_low;
+ return Is32bit ? subreg_l32 : subreg_l64;
}
}
@@ -48,6 +48,10 @@ public:
LLVM_OVERRIDE {
return true;
}
+ virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const
+ LLVM_OVERRIDE {
+ return true;
+ }
virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
const LLVM_OVERRIDE;
virtual BitVector getReservedRegs(const MachineFunction &MF)
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index ffffe72..93d7c83 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -21,11 +21,12 @@ class SystemZRegWithSubregs<string n, list<Register> subregs>
}
let Namespace = "SystemZ" in {
-def subreg_32bit : SubRegIndex<32>; // could also be named "subreg_high32"
-// Indices are used in a variety of ways, so don't set an Offset.
-def subreg_high : SubRegIndex<64, -1>;
-def subreg_low : SubRegIndex<64, -1>;
-def subreg_low32 : ComposedSubRegIndex<subreg_low, subreg_32bit>;
+def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
+def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
+def subreg_l64 : SubRegIndex<64, 0>;
+def subreg_h64 : SubRegIndex<64, 64>;
+def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
+def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
}
// Define a register class that contains values of type TYPE and an
@@ -55,36 +56,49 @@ class GPR32<bits<16> num, string n> : SystemZReg<n> {
}
// One of the 16 64-bit general-purpose registers.
-class GPR64<bits<16> num, string n, GPR32 low>
- : SystemZRegWithSubregs<n, [low]> {
+class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_l32, subreg_h32];
}
// 8 even-odd pairs of GPR64s.
-class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// General-purpose registers
foreach I = 0-15 in {
- def R#I#W : GPR32<I, "r"#I>;
- def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+ def R#I#L : GPR32<I, "r"#I>;
+ def R#I#H : GPR32<I, "r"#I>;
+ def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"L"), !cast<GPR32>("R"#I#"H")>,
+ DwarfRegNum<[I]>;
}
foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
- def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
- !cast<GPR64>("R"#!add(I, 1)#"D")>;
+ def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#!add(I, 1)#"D"),
+ !cast<GPR64>("R"#I#"D")>;
}
/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
/// together with R14 and R15 in one prolog instruction.
-defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW", 0, 5),
- (sequence "R%uW", 15, 6))>;
-defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
- (sequence "R%uD", 15, 6))>;
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5),
+ (sequence "R%uL", 15, 6))>;
+defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5),
+ (sequence "R%uH", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
+ (sequence "R%uD", 15, 6))>;
+
+// Combine the low and high GR32s into a single class. This can only be
+// used for virtual registers if the high-word facility is available.
+defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
+ (add (sequence "R%uL", 0, 5),
+ (sequence "R%uH", 0, 5),
+ R15L, R15H, R14L, R14H, R13L, R13H,
+ R12L, R12H, R11L, R11H, R10L, R10H,
+ R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>;
// The architecture doesn't really have any i128 support, so model the
// register pairs as untyped instead.
@@ -94,7 +108,7 @@ defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
// Base and index registers. Everything except R0, which in an address
// context evaluates as 0.
-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
@@ -114,14 +128,14 @@ class FPR32<bits<16> num, string n> : SystemZReg<n> {
class FPR64<bits<16> num, string n, FPR32 low>
: SystemZRegWithSubregs<n, [low]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_32bit];
+ let SubRegIndices = [subreg_h32];
}
// 8 pairs of FPR64s, with a one-register gap inbetween.
-class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
- : SystemZRegWithSubregs<n, [high, low]> {
+class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
+ : SystemZRegWithSubregs<n, [low, high]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_high, subreg_low];
+ let SubRegIndices = [subreg_l64, subreg_h64];
}
// Floating-point registers
@@ -132,8 +146,8 @@ foreach I = 0-15 in {
}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
- def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
- !cast<FPR64>("F"#!add(I, 2)#"D")>;
+ def F#I#Q : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
+ !cast<FPR64>("F"#I#"D")>;
}
// There's no store-multiple instruction for FPRs, so we're not fussy
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 4ca9292..c7ebb5d 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -25,6 +25,34 @@ SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
+// Decide whether it is best to use a loop or straight-line code for
+// a block operation of Size bytes with source address Src and destination
+// address Dest. Sequence is the opcode to use for straight-line code
+// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
+// Return the chain for the completed operation.
+static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+ unsigned Loop, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
+ EVT PtrVT = Src.getValueType();
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't
+ // much to choose between straight-line code and looping code,
+ // since the time will be dominated by the MVCs themselves.
+ // However, the loop has 4 or 5 instructions (depending on whether
+ // the base addresses can be proved equal), so there doesn't seem
+ // much point using a loop for 5 * 256 bytes or fewer. Anything in
+ // the range (5 * 256, 6 * 256) will need another instruction after
+ // the loop, so it doesn't seem worth using a loop then either.
+ // The next value up, 6 * 256, can be implemented in the same
+ // number of straight-line MVCs as 6 * 256 - 1.
+ if (Size > 6 * 256)
+ return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
+ DAG.getConstant(Size, PtrVT));
+}
+
SDValue SystemZSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
@@ -34,14 +62,9 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
if (IsVolatile)
return SDValue();
- if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
- uint64_t Bytes = CSize->getZExtValue();
- if (Bytes >= 1 && Bytes <= 0x100) {
- // A single MVC.
- return DAG.getNode(SystemZISD::MVC, DL, MVT::Other,
- Chain, Dst, Src, Size);
- }
- }
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size))
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, Dst, Src, CSize->getZExtValue());
return SDValue();
}
@@ -65,7 +88,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dst, SDValue Byte, SDValue Size,
unsigned Align, bool IsVolatile,
MachinePointerInfo DstPtrInfo) const {
- EVT DstVT = Dst.getValueType();
+ EVT PtrVT = Dst.getValueType();
if (IsVolatile)
return SDValue();
@@ -89,8 +112,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Align, DstPtrInfo);
if (Size2 == 0)
return Chain1;
- Dst = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(Size1, DstVT));
+ Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(Size1, PtrVT));
DstPtrInfo = DstPtrInfo.getWithOffset(Size1);
SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2,
std::min(Align, Size1), DstPtrInfo);
@@ -103,8 +126,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
false, false, Align);
if (Bytes == 1)
return Chain1;
- SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(1, DstVT));
+ SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2,
DstPtrInfo.getWithOffset(1),
false, false, 1);
@@ -112,16 +135,159 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
}
}
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
- if (Bytes <= 0x101) {
- // Copy the byte to the first location and then use MVC to copy
- // it to the rest.
- Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
- false, false, Align);
- SDValue Dst2 = DAG.getNode(ISD::ADD, DL, DstVT, Dst,
- DAG.getConstant(1, DstVT));
- return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst2, Dst,
- DAG.getConstant(Bytes - 1, MVT::i32));
- }
+
+ // Handle the special case of a memset of 0, which can use XC.
+ ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte);
+ if (CByte && CByte->getZExtValue() == 0)
+ return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
+ Chain, Dst, Dst, Bytes);
+
+ // Copy the byte to the first location and then use MVC to copy
+ // it to the rest.
+ Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
+ false, false, Align);
+ SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
+ DAG.getConstant(1, PtrVT));
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, DstPlus1, Dst, Bytes - 1);
}
return SDValue();
}
+
+// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
+// deciding whether to use a loop or straight-line code.
+static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, uint64_t Size) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = Src1.getValueType();
+ // A two-CLC sequence is a clear win over a loop, not least because it
+ // needs only one branch. A three-CLC sequence needs the same number
+ // of branches as a loop (i.e. 2), but is shorter. That brings us to
+ // lengths greater than 768 bytes. It seems relatively likely that
+ // a difference will be found within the first 768 bytes, so we just
+ // optimize for the smallest number of branch instructions, in order
+ // to avoid polluting the prediction buffer too much. A loop only ever
+ // needs 2 branches, whereas a straight-line sequence would need 3 or more.
+ if (Size > 3 * 256)
+ return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT));
+}
+
+// Convert the current CC value into an integer that is 0 if CC == 0,
+// less than zero if CC == 1 and greater than zero if CC >= 2.
+// The sequence starts with IPM, which puts CC into bits 29 and 28
+// of an integer and clears bits 30 and 31.
+static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
+ SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+ SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
+ DAG.getConstant(31, MVT::i32));
+ return ROTL;
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ uint64_t Bytes = CSize->getZExtValue();
+ assert(Bytes > 0 && "Caller should have handled 0-size case");
+ Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ SDValue Glue = Chain.getValue(1);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+ }
+ return std::make_pair(SDValue(), SDValue());
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Use SRST to find the character. End is its address on success.
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
+ Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
+ Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
+ DAG.getConstant(255, MVT::i32));
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, Char);
+ Chain = End.getValue(1);
+ SDValue Glue = End.getValue(2);
+
+ // Now select between End and null, depending on whether the character
+ // was found.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(End);
+ Ops.push_back(DAG.getConstant(0, PtrVT));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
+ Ops.push_back(Glue);
+ VTs = DAG.getVTList(PtrVT, MVT::Glue);
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ return std::make_pair(End, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo, bool isStpcpy) const {
+ SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other);
+ SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src,
+ DAG.getConstant(0, MVT::i32));
+ return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const {
+ SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
+ SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(0, MVT::i32));
+ Chain = Unused.getValue(1);
+ SDValue Glue = Chain.getValue(2);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
+}
+
+// Search from Src for a null character, stopping once Src reaches Limit.
+// Return a pair of values, the first being the number of nonnull characters
+// and the second being the out chain.
+//
+// This can be used for strlen by setting Limit to 0.
+static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, SDLoc DL,
+ SDValue Chain, SDValue Src,
+ SDValue Limit) {
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, DAG.getConstant(0, MVT::i32));
+ Chain = End.getValue(1);
+ SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src);
+ return std::make_pair(Len, Chain);
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, PtrVT));
+}
+
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const {
+ EVT PtrVT = Src.getValueType();
+ MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT);
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength);
+ return getBoundedStrlen(DAG, DL, Chain, Src, Limit);
+}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 9138a9c..281d1e2 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -38,7 +38,41 @@ public:
EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL,
SDValue Chain, SDValue Dst, SDValue Byte,
SDValue Size, unsigned Align, bool IsVolatile,
- MachinePointerInfo DstPtrInfo) const;
+ MachinePointerInfo DstPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, SDValue Size,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Dest, SDValue Src,
+ MachinePointerInfo DestPtrInfo,
+ MachinePointerInfo SrcPtrInfo,
+ bool isStpcpy) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2,
+ MachinePointerInfo Op1PtrInfo,
+ MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, MachinePointerInfo SrcPtrInfo) const
+ LLVM_OVERRIDE;
+
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForStrnlen(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue MaxLength,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
};
}
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
new file mode 100644
index 0000000..537a545
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -0,0 +1,163 @@
+//===-- SystemZShortenInst.cpp - Instruction-shortening pass --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass tries to replace instructions with shorter forms. For example,
+// IILF can be replaced with LLILL or LLILH if the constant fits and if the
+// other 32 bits of the GR64 destination are not live.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-shorten-inst"
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+namespace {
+ class SystemZShortenInst : public MachineFunctionPass {
+ public:
+ static char ID;
+ SystemZShortenInst(const SystemZTargetMachine &tm);
+
+ virtual const char *getPassName() const {
+ return "SystemZ Instruction Shortening";
+ }
+
+ bool processBlock(MachineBasicBlock *MBB);
+ bool runOnMachineFunction(MachineFunction &F);
+
+ private:
+ bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
+ unsigned LLIxL, unsigned LLIxH);
+
+ const SystemZInstrInfo *TII;
+
+ // LowGPRs[I] has bit N set if LLVM register I includes the low
+ // word of GPR N. HighGPRs is the same for the high word.
+ unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
+ unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+ };
+
+ char SystemZShortenInst::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
+ return new SystemZShortenInst(TM);
+}
+
+SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
+ : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() {
+ // Set up LowGPRs and HighGPRs.
+ for (unsigned I = 0; I < 16; ++I) {
+ LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
+ LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
+ HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
+ if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
+ LowGPRs[GR128] |= 3 << I;
+ HighGPRs[GR128] |= 3 << I;
+ }
+ }
+}
+
+// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
+// are the halfword immediate loads for the same word. Try to use one of them
+// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high
+// words referenced by LLVM register X while LiveOther is the mask of low
+// words that are currently live, and vice versa.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
+ unsigned LiveOther, unsigned LLIxL,
+ unsigned LLIxH) {
+ unsigned Reg = MI.getOperand(0).getReg();
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ unsigned GPRs = GPRMap[Reg];
+ assert(GPRs != 0 && "Register must be a GPR");
+ if (GPRs & LiveOther)
+ return false;
+
+ uint64_t Imm = MI.getOperand(1).getImm();
+ if (SystemZ::isImmLL(Imm)) {
+ MI.setDesc(TII->get(LLIxL));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ return true;
+ }
+ if (SystemZ::isImmLH(Imm)) {
+ MI.setDesc(TII->get(LLIxH));
+ MI.getOperand(0).setReg(SystemZMC::getRegAsGR64(Reg));
+ MI.getOperand(1).setImm(Imm >> 16);
+ return true;
+ }
+ return false;
+}
+
+// Process all instructions in MBB. Return true if something changed.
+bool SystemZShortenInst::processBlock(MachineBasicBlock *MBB) {
+ bool Changed = false;
+
+ // Work out which words are live on exit from the block.
+ unsigned LiveLow = 0;
+ unsigned LiveHigh = 0;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
+ LE = (*SI)->livein_end(); LI != LE; ++LI) {
+ unsigned Reg = *LI;
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ LiveLow |= LowGPRs[Reg];
+ LiveHigh |= HighGPRs[Reg];
+ }
+ }
+
+ // Iterate backwards through the block looking for instructions to change.
+ for (MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(),
+ MBBE = MBB->rend(); MBBI != MBBE; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == SystemZ::IILF)
+ Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
+ SystemZ::LLILH);
+ else if (Opcode == SystemZ::IIHF)
+ Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
+ SystemZ::LLIHH);
+ unsigned UsedLow = 0;
+ unsigned UsedHigh = 0;
+ for (MachineInstr::mop_iterator MOI = MI.operands_begin(),
+ MOE = MI.operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (MO.isReg()) {
+ if (unsigned Reg = MO.getReg()) {
+ assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
+ if (MO.isDef()) {
+ LiveLow &= ~LowGPRs[Reg];
+ LiveHigh &= ~HighGPRs[Reg];
+ } else if (!MO.isUndef()) {
+ UsedLow |= LowGPRs[Reg];
+ UsedHigh |= HighGPRs[Reg];
+ }
+ }
+ }
+ }
+ LiveLow |= UsedLow;
+ LiveHigh |= UsedHigh;
+ }
+
+ return Changed;
+}
+
+bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
+ TII = static_cast<const SystemZInstrInfo *>(F.getTarget().getInstrInfo());
+
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
+ MFI != MFE; ++MFI)
+ Changed |= processBlock(MFI);
+
+ return Changed;
+}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 036ec05..3971d5e 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -9,6 +9,7 @@
#include "SystemZSubtarget.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Host.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#define GET_SUBTARGETINFO_TARGET_DESC
@@ -17,14 +18,22 @@
using namespace llvm;
+// Pin the vtabel to this file.
+void SystemZSubtarget::anchor() {}
+
SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const std::string &CPU,
const std::string &FS)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
- HasLoadStoreOnCond(false), HasHighWord(false), TargetTriple(TT) {
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ TargetTriple(TT) {
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = "z10";
+ CPUName = "generic";
+#if defined(__linux__) && defined(__s390x__)
+ if (CPUName == "generic")
+ CPUName = sys::getHostCPUName();
+#endif
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 4efb58d..5817491 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -26,10 +26,12 @@ class GlobalValue;
class StringRef;
class SystemZSubtarget : public SystemZGenSubtargetInfo {
+ virtual void anchor();
protected:
bool HasDistinctOps;
bool HasLoadStoreOnCond;
bool HasHighWord;
+ bool HasFPExtension;
private:
Triple TargetTriple;
@@ -38,6 +40,9 @@ public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS);
+ // This is important for reducing register pressure in vector code.
+ virtual bool useAA() const LLVM_OVERRIDE { return true; }
+
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -50,6 +55,9 @@ public:
// Return true if the target has the high-word facility.
bool hasHighWord() const { return HasHighWord; }
+ // Return true if the target has the floating-point extension facility.
+ bool hasFPExtension() const { return HasFPExtension; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 856183c..dee92e9 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -10,6 +10,7 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -47,12 +48,18 @@ public:
return getTM<SystemZTargetMachine>();
}
+ virtual void addIRPasses() LLVM_OVERRIDE;
virtual bool addInstSelector() LLVM_OVERRIDE;
virtual bool addPreSched2() LLVM_OVERRIDE;
virtual bool addPreEmitPass() LLVM_OVERRIDE;
};
} // end anonymous namespace
+void SystemZPassConfig::addIRPasses() {
+ TargetPassConfig::addIRPasses();
+ addPass(createPartiallyInlineLibCallsPass());
+}
+
bool SystemZPassConfig::addInstSelector() {
addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
return false;
@@ -90,6 +97,8 @@ bool SystemZPassConfig::addPreEmitPass() {
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
return true;
}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 3d92f29..2190198 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -88,6 +88,14 @@ LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
}
+LLVMTypeRef LLVMIntPtrTypeInContext(LLVMContextRef C, LLVMTargetDataRef TD) {
+ return wrap(unwrap(TD)->getIntPtrType(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMIntPtrTypeForASInContext(LLVMContextRef C, LLVMTargetDataRef TD, unsigned AS) {
+ return wrap(unwrap(TD)->getIntPtrType(*unwrap(C), AS));
+}
+
unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
}
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 8696b57..3e68fe1 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -38,6 +38,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"_ZnwjRKSt9nothrow_t",
"_Znwm",
"_ZnwmRKSt9nothrow_t",
+ "__cospi",
+ "__cospif",
"__cxa_atexit",
"__cxa_guard_abort",
"__cxa_guard_acquire",
@@ -45,6 +47,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"__isoc99_scanf",
"__isoc99_sscanf",
"__memcpy_chk",
+ "__sincospi_stret",
+ "__sincospi_stretf",
+ "__sinpi",
+ "__sinpif",
"__sqrt_finite",
"__sqrtf_finite",
"__sqrtl_finite",
@@ -331,6 +337,24 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
"write"
};
+static bool hasSinCosPiStret(const Triple &T) {
+ // Only Darwin variants have _stret versions of combined trig functions.
+ if (!T.isMacOSX() && T.getOS() != Triple::IOS)
+ return false;
+
+ // The ABI is rather complicated on x86, so don't do anything special there.
+ if (T.getArch() == Triple::x86)
+ return false;
+
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 9))
+ return false;
+
+ if (T.getOS() == Triple::IOS && T.isOSVersionLT(7, 0))
+ return false;
+
+ return true;
+}
+
/// initialize - Initialize the set of available library functions based on the
/// specified target triple. This should be carefully written so that a missing
/// target triple gets a sane set of defaults.
@@ -350,13 +374,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
if (T.isMacOSX()) {
if (T.isMacOSXVersionLT(10, 5))
TLI.setUnavailable(LibFunc::memset_pattern16);
- } else if (T.getOS() == Triple::IOS) {
+ } else if (T.isiOS()) {
if (T.isOSVersionLT(3, 0))
TLI.setUnavailable(LibFunc::memset_pattern16);
} else {
TLI.setUnavailable(LibFunc::memset_pattern16);
}
+ if (!hasSinCosPiStret(T)) {
+ TLI.setUnavailable(LibFunc::sinpi);
+ TLI.setUnavailable(LibFunc::sinpif);
+ TLI.setUnavailable(LibFunc::cospi);
+ TLI.setUnavailable(LibFunc::cospif);
+ TLI.setUnavailable(LibFunc::sincospi_stret);
+ TLI.setUnavailable(LibFunc::sincospi_stretf);
+ }
+
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
!T.isMacOSXVersionLT(10, 7)) {
// x86-32 OSX has a scheme where fwrite and fputs (and some other functions
@@ -562,7 +595,7 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T,
}
// The following functions are available on at least Linux:
- if (T.getOS() != Triple::Linux) {
+ if (!T.isOSLinux()) {
TLI.setUnavailable(LibFunc::dunder_strdup);
TLI.setUnavailable(LibFunc::dunder_strtok_r);
TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index cd810b6..7b8d110 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -97,10 +97,20 @@ static bool IsNullTerminatedString(const Constant *C) {
return false;
}
+/// Return the MCSymbol for the specified global value. This
+/// symbol is the main label that is the address of the global.
+MCSymbol *TargetLoweringObjectFile::getSymbol(Mangler &M,
+ const GlobalValue *GV) const {
+ SmallString<60> NameStr;
+ M.getNameWithPrefix(NameStr, GV, false);
+ return Ctx->GetOrCreateSymbol(NameStr.str());
+}
+
+
MCSymbol *TargetLoweringObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
- return Mang->getSymbol(GV);
+ return getSymbol(*Mang, GV);
}
void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
@@ -293,7 +303,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const {
const MCSymbolRefExpr *Ref =
- MCSymbolRefExpr::Create(Mang->getSymbol(GV), getContext());
+ MCSymbolRefExpr::Create(getSymbol(*Mang, GV), getContext());
return getTTypeReference(Ref, Encoding, Streamer);
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index df4a03c..cb42e83 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -164,6 +164,11 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const {
return CodeGenInfo->getOptLevel();
}
+void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
+ if (CodeGenInfo)
+ CodeGenInfo->setOptLevel(Level);
+}
+
bool TargetMachine::getAsmVerbosityDefault() {
return AsmVerbosityDefault;
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 7419122..3d5f827 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
#include <cstdlib>
@@ -60,13 +61,44 @@ inline LLVMTargetRef wrap(const Target * P) {
}
LLVMTargetRef LLVMGetFirstTarget() {
- const Target* target = &*TargetRegistry::begin();
- return wrap(target);
+ if(TargetRegistry::begin() == TargetRegistry::end()) {
+ return NULL;
+ }
+
+ const Target* target = &*TargetRegistry::begin();
+ return wrap(target);
}
LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
return wrap(unwrap(T)->getNext());
}
+LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
+ StringRef NameRef = Name;
+ for (TargetRegistry::iterator IT = TargetRegistry::begin(),
+ IE = TargetRegistry::end(); IT != IE; ++IT) {
+ if (IT->getName() == NameRef)
+ return wrap(&*IT);
+ }
+
+ return NULL;
+}
+
+LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
+ char **ErrorMessage) {
+ std::string Error;
+
+ *T = wrap(TargetRegistry::lookupTarget(TripleStr, Error));
+
+ if (!*T) {
+ if (ErrorMessage)
+ *ErrorMessage = strdup(Error.c_str());
+
+ return 1;
+ }
+
+ return 0;
+}
+
const char * LLVMGetTargetName(LLVMTargetRef T) {
return unwrap(T)->getName();
}
@@ -87,9 +119,10 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
return unwrap(T)->hasMCAsmBackend();
}
-LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
- char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
- LLVMCodeModel CodeModel) {
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
+ const char* Triple, const char* CPU, const char* Features,
+ LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+ LLVMCodeModel CodeModel) {
Reloc::Model RM;
switch (Reloc){
case LLVMRelocStatic:
@@ -158,6 +191,11 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
return wrap(unwrap(T)->getDataLayout());
}
+void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
+ LLVMBool VerboseAsm) {
+ unwrap(T)->setAsmVerbosityDefault(VerboseAsm);
+}
+
static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
TargetMachine* TM = unwrap(T);
@@ -201,11 +239,11 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
std::string error;
raw_fd_ostream dest(Filename, error, sys::fs::F_Binary);
- formatted_raw_ostream destf(dest);
if (!error.empty()) {
*ErrorMessage = strdup(error.c_str());
return true;
}
+ formatted_raw_ostream destf(dest);
bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
dest.flush();
return Result;
@@ -225,3 +263,7 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
Data.length(), "");
return Result;
}
+
+char *LLVMGetDefaultTargetTriple(void) {
+ return strdup(sys::getDefaultTargetTriple().c_str());
+}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index af0cef6..10e8db5 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
@@ -22,6 +23,21 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
+// Temporary option to compare overall performance change when moving from the
+// SD scheduler to the MachineScheduler pass pipeline. It should be removed
+// before 3.4. The normal way to enable/disable the MachineScheduling pass
+// itself is by using -enable-misched. For targets that already use MI sched
+// (via MySubTarget::enableMachineScheduler()) -misched-bench=false negates the
+// subtarget hook.
+static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
+ cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
+
+bool TargetSubtargetInfo::useMachineScheduler() const {
+ if (BenchMachineSched.getNumOccurrences())
+ return BenchMachineSched;
+ return enableMachineScheduler();
+}
+
bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
@@ -35,3 +51,6 @@ bool TargetSubtargetInfo::enablePostRAScheduler(
return false;
}
+bool TargetSubtargetInfo::useAA() const {
+ return false;
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index ad83d97..bc8f367 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -80,7 +80,7 @@ private:
PostfixStack.push_back(std::make_pair(Op, Val));
}
- void popOperator() { InfixOperatorStack.pop_back_val(); }
+ void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
if (InfixOperatorStack.empty()) {
@@ -118,12 +118,12 @@ private:
if (StackOp == IC_RPAREN) {
++ParenCount;
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
} else if (StackOp == IC_LPAREN) {
--ParenCount;
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
} else {
- InfixOperatorStack.pop_back_val();
+ InfixOperatorStack.pop_back();
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
}
@@ -495,16 +495,17 @@ private:
X86Operand *ParseATTOperand();
X86Operand *ParseIntelOperand();
X86Operand *ParseIntelOffsetOfOperator();
- X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+ bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
X86Operand *ParseIntelOperator(unsigned OpKind);
- X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
- SMLoc StartLoc);
- X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+ X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
+ X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
+ unsigned Size);
+ bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
int64_t ImmDisp, unsigned Size);
- X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
- InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand, SMLoc &End);
+ bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End);
X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
@@ -555,8 +556,9 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
- : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
+ X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ const MCInstrInfo &MII)
+ : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -814,6 +816,9 @@ struct X86Operand : public MCParsedAsmOperand {
bool isMem256() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 256);
}
+ bool isMem512() const {
+ return Kind == Memory && (!Mem.Size || Mem.Size == 512);
+ }
bool isMemVX32() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 32) &&
@@ -840,17 +845,36 @@ struct X86Operand : public MCParsedAsmOperand {
getMemIndexReg() >= X86::ZMM0 && getMemIndexReg() <= X86::ZMM31;
}
- bool isMem512() const {
- return Kind == Memory && (!Mem.Size || Mem.Size == 512);
- }
-
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
}
+ bool isMemOffs8() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 8);
+ }
+ bool isMemOffs16() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 16);
+ }
+ bool isMemOffs32() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 32);
+ }
+ bool isMemOffs64() const {
+ return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+ !getMemIndexReg() && getMemScale() == 1 && (!Mem.Size || Mem.Size == 64);
+ }
+
bool isReg() const { return Kind == Register; }
+ bool isGR32orGR64() const {
+ return Kind == Register &&
+ (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
+ X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible.
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -864,53 +888,40 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- void addImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ static unsigned getGR32FromGR64(unsigned RegNo) {
+ switch (RegNo) {
+ default: llvm_unreachable("Unexpected register");
+ case X86::RAX: return X86::EAX;
+ case X86::RCX: return X86::ECX;
+ case X86::RDX: return X86::EDX;
+ case X86::RBX: return X86::EBX;
+ case X86::RBP: return X86::EBP;
+ case X86::RSP: return X86::ESP;
+ case X86::RSI: return X86::ESI;
+ case X86::RDI: return X86::EDI;
+ case X86::R8: return X86::R8D;
+ case X86::R9: return X86::R9D;
+ case X86::R10: return X86::R10D;
+ case X86::R11: return X86::R11D;
+ case X86::R12: return X86::R12D;
+ case X86::R13: return X86::R13D;
+ case X86::R14: return X86::R14D;
+ case X86::R15: return X86::R15D;
+ case X86::RIP: return X86::EIP;
+ }
}
- void addMem8Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem16Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem80Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem128Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem256Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVX32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVY32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVX64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVY64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addGR32orGR64Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned RegNo = getReg();
+ if (X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+ RegNo = getGR32FromGR64(RegNo);
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
}
- void addMemVZ32Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMemVZ64Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
- }
- void addMem512Operands(MCInst &Inst, unsigned N) const {
- addMemOperands(Inst, N);
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
}
void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -931,6 +942,15 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
+ void addMemOffsOperands(MCInst &Inst, unsigned N) const {
+ assert((N == 1) && "Invalid number of operands!");
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+ }
+
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
@@ -1249,8 +1269,7 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
}
}
-X86Operand *
-X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
const AsmToken &Tok = Parser.getTok();
bool Done = false;
@@ -1272,7 +1291,7 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
Done = true;
break;
}
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return Error(Tok.getLoc(), "unknown token in expression");
}
case AsmToken::EndOfStatement: {
Done = true;
@@ -1291,18 +1310,18 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
} else {
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
- return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ return Error(Tok.getLoc(), "Unexpected identifier!");
} else {
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return true;
}
SM.onIdentifierExpr(Val, Identifier);
UpdateLocLex = false;
break;
}
- return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+ return Error(Tok.getLoc(), "Unexpected identifier!");
}
case AsmToken::Integer:
if (isParsingInlineAsm() && SM.getAddImmPrefix())
@@ -1320,14 +1339,14 @@ X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::RParen: SM.onRParen(); break;
}
if (SM.hadError())
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return Error(Tok.getLoc(), "unknown token in expression");
if (!Done && UpdateLocLex) {
End = Tok.getLoc();
Parser.Lex(); // Consume the token.
}
}
- return 0;
+ return false;
}
X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
@@ -1344,8 +1363,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// may have already parsed an immediate displacement before the bracketed
// expression.
IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
- if (X86Operand *Err = ParseIntelExpression(SM, End))
- return Err;
+ if (ParseIntelExpression(SM, End))
+ return 0;
const MCExpr *Disp;
if (const MCExpr *Sym = SM.getSym()) {
@@ -1363,8 +1382,8 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// Parse the dot operator (e.g., [ebx].foo.bar).
if (Tok.getString().startswith(".")) {
const MCExpr *NewDisp;
- if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
- return Err;
+ if (ParseIntelDotOperator(Disp, NewDisp))
+ return 0;
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
@@ -1392,11 +1411,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
}
// Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
- StringRef &Identifier,
- InlineAsmIdentifierInfo &Info,
- bool IsUnevaluatedOperand,
- SMLoc &End) {
+bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+ StringRef &Identifier,
+ InlineAsmIdentifierInfo &Info,
+ bool IsUnevaluatedOperand, SMLoc &End) {
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = 0;
@@ -1421,68 +1439,89 @@ X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
- return 0;
+ return false;
}
-/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
- int64_t ImmDisp,
- SMLoc Start) {
- const AsmToken &Tok = Parser.getTok();
- SMLoc End;
-
- unsigned Size = getIntelMemOperandSize(Tok.getString());
- if (Size) {
- Parser.Lex(); // Eat operand size (e.g., byte, word).
- if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
- return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
- Parser.Lex(); // Eat ptr.
- }
-
- // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+/// \brief Parse intel style segment override.
+X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
+ SMLoc Start,
+ unsigned Size) {
+ assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
+ const AsmToken &Tok = Parser.getTok(); // Eat colon.
+ if (Tok.isNot(AsmToken::Colon))
+ return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
+ Parser.Lex(); // Eat ':'
+
+ int64_t ImmDisp = 0;
if (getLexer().is(AsmToken::Integer)) {
+ ImmDisp = Tok.getIntVal();
+ AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
+
if (isParsingInlineAsm())
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
- Tok.getLoc()));
- int64_t ImmDisp = Tok.getIntVal();
- Parser.Lex(); // Eat the integer.
- if (getLexer().isNot(AsmToken::LBrac))
- return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
+ InstInfo->AsmRewrites->push_back(
+ AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
+
+ if (getLexer().isNot(AsmToken::LBrac)) {
+ // An immediate following a 'segment register', 'colon' token sequence can
+ // be followed by a bracketed expression. If it isn't we know we have our
+ // final segment override.
+ const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
+ return X86Operand::CreateMem(SegReg, Disp, /*BaseReg=*/0, /*IndexReg=*/0,
+ /*Scale=*/1, Start, ImmDispToken.getEndLoc(),
+ Size);
+ }
}
if (getLexer().is(AsmToken::LBrac))
return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
- if (!ParseRegister(SegReg, Start, End)) {
- // Handel SegReg : [ ... ]
- if (getLexer().isNot(AsmToken::Colon))
- return ErrorOperand(Start, "Expected ':' token!");
- Parser.Lex(); // Eat :
- if (getLexer().isNot(AsmToken::LBrac))
- return ErrorOperand(Start, "Expected '[' token!");
- return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
+ const MCExpr *Val;
+ SMLoc End;
+ if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+
+ return X86Operand::CreateMem(Val, Start, End, Size);
}
+ InlineAsmIdentifierInfo Info;
+ StringRef Identifier = Tok.getString();
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
+ return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+ /*Scale=*/1, Start, End, Size, Identifier, Info);
+}
+
+/// ParseIntelMemOperand - Parse intel style memory operand.
+X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
+ unsigned Size) {
+ const AsmToken &Tok = Parser.getTok();
+ SMLoc End;
+
+ // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+ if (getLexer().is(AsmToken::LBrac))
+ return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
+
const MCExpr *Val;
if (!isParsingInlineAsm()) {
if (getParser().parsePrimaryExpr(Val, End))
- return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
return X86Operand::CreateMem(Val, Start, End, Size);
}
InlineAsmIdentifierInfo Info;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
- return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
+ return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
/// Parse the '.' operator.
-X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
const MCExpr *&NewDisp) {
const AsmToken &Tok = Parser.getTok();
int64_t OrigDispVal, DotDispVal;
@@ -1491,7 +1530,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
OrigDispVal = OrigDisp->getValue();
else
- return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
+ return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
// Drop the '.'.
StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1506,10 +1545,10 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
DotDisp))
- return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
+ return Error(Tok.getLoc(), "Unable to lookup field reference!");
DotDispVal = DotDisp;
} else
- return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
+ return Error(Tok.getLoc(), "Unexpected token type!");
if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1520,7 +1559,7 @@ X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
}
NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
- return 0;
+ return false;
}
/// Parse the 'offset' operator. This operator is used to specify the
@@ -1534,9 +1573,9 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ false, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return 0;
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1570,9 +1609,12 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
- if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated*/ true, End))
- return Err;
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/true, End))
+ return 0;
+
+ if (!Info.OpDecl)
+ return ErrorOperand(Start, "unable to lookup expression");
unsigned CVal = 0;
switch(OpKind) {
@@ -1593,7 +1635,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
X86Operand *X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
- SMLoc Start = Tok.getLoc(), End;
+ SMLoc Start, End;
// Offset, length, type and size operators.
if (isParsingInlineAsm()) {
@@ -1608,14 +1650,23 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return ParseIntelOperator(IOK_TYPE);
}
+ unsigned Size = getIntelMemOperandSize(Tok.getString());
+ if (Size) {
+ Parser.Lex(); // Eat operand size (e.g., byte, word).
+ if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+ return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+ Parser.Lex(); // Eat ptr.
+ }
+ Start = Tok.getLoc();
+
// Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
getLexer().is(AsmToken::LParen)) {
AsmToken StartTok = Tok;
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
- if (X86Operand *Err = ParseIntelExpression(SM, End))
- return Err;
+ if (ParseIntelExpression(SM, End))
+ return 0;
int64_t Imm = SM.getImm();
if (isParsingInlineAsm()) {
@@ -1639,23 +1690,22 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
"before bracketed expr.");
// Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
- return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
+ return ParseIntelMemOperand(Imm, Start, Size);
}
// Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
- // of a memory reference, otherwise this is a normal register reference.
+ // of a segment override, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
- getParser().Lex(); // Eat the colon.
- return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start);
+ return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
}
// Memory operand.
- return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start);
+ return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
}
X86Operand *X86AsmParser::ParseATTOperand() {
@@ -1967,6 +2017,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
}
}
+ if (STI.getFeatureBits() & X86::FeatureAVX512) {
+ // Parse mask register {%k1}
+ if (getLexer().is(AsmToken::LCurly)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("{", Loc));
+ Parser.Lex(); // Eat the {
+ if (X86Operand *Op = ParseOperand()) {
+ Operands.push_back(Op);
+ if (!getLexer().is(AsmToken::RCurly)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected } at this point");
+ }
+ Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("}", Loc));
+ Parser.Lex(); // Eat the }
+ } else {
+ Parser.eatToEndOfStatement();
+ return true;
+ }
+ }
+ // Parse "zeroing non-masked" semantic {z}
+ if (getLexer().is(AsmToken::LCurly)) {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(X86Operand::CreateToken("{z}", Loc));
+ Parser.Lex(); // Eat the {
+ if (!getLexer().is(AsmToken::Identifier) || getLexer().getTok().getIdentifier() != "z") {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected z at this point");
+ }
+ Parser.Lex(); // Eat the z
+ if (!getLexer().is(AsmToken::RCurly)) {
+ SMLoc Loc = getLexer().getLoc();
+ Parser.eatToEndOfStatement();
+ return Error(Loc, "Expected } at this point");
+ }
+ Parser.Lex(); // Eat the }
+ }
+ }
+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
@@ -2218,6 +2309,55 @@ processInstruction(MCInst &Inst,
case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+ case X86::VMOVAPDrr:
+ case X86::VMOVAPDYrr:
+ case X86::VMOVAPSrr:
+ case X86::VMOVAPSYrr:
+ case X86::VMOVDQArr:
+ case X86::VMOVDQAYrr:
+ case X86::VMOVDQUrr:
+ case X86::VMOVDQUYrr:
+ case X86::VMOVUPDrr:
+ case X86::VMOVUPDYrr:
+ case X86::VMOVUPSrr:
+ case X86::VMOVUPSYrr: {
+ if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
+ !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
+ return false;
+
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
+ case X86::VMOVSDrr:
+ case X86::VMOVSSrr: {
+ if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
+ !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
+ return false;
+ unsigned NewOpc;
+ switch (Inst.getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
+ case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
+ }
+ Inst.setOpcode(NewOpc);
+ return true;
+ }
}
}
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 82af6fa..903e36c 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -231,16 +231,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
default:
break;
case 1:
- type = TYPE_MOFFS8;
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
break;
case 2:
- type = TYPE_MOFFS16;
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
break;
case 4:
- type = TYPE_MOFFS32;
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
break;
case 8:
- type = TYPE_MOFFS64;
break;
}
}
@@ -263,16 +265,18 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri &&
Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri &&
Opcode != X86::VINSERTPSrr)
- type = TYPE_MOFFS8;
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
break;
case ENCODING_IW:
- type = TYPE_MOFFS16;
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
break;
case ENCODING_ID:
- type = TYPE_MOFFS32;
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
break;
case ENCODING_IO:
- type = TYPE_MOFFS64;
break;
}
}
@@ -292,30 +296,21 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
case TYPE_REL8:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- // fall through to sign extend the immediate if needed.
- case TYPE_MOFFS8:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
- case TYPE_MOFFS16:
- if(immediate & 0x8000)
- immediate |= ~(0xffffull);
- break;
case TYPE_REL32:
case TYPE_REL64:
isBranch = true;
pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- // fall through to sign extend the immediate if needed.
- case TYPE_MOFFS32:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
- case TYPE_MOFFS64:
default:
// operand is 64 bits wide. Do nothing.
break;
}
-
+
if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
insn.immediateOffset, insn.immediateSize,
mcInst, Dis))
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index bb195ee..c81a857 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -25,8 +25,6 @@
#define TRUE 1
#define FALSE 0
-typedef int8_t bool;
-
#ifndef NDEBUG
#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
#else
@@ -81,6 +79,15 @@ static int modRMRequired(OpcodeType type,
case THREEBYTE_A7:
decision = &THREEBYTEA7_SYM;
break;
+ case XOP8_MAP:
+ decision = &XOP8_MAP_SYM;
+ break;
+ case XOP9_MAP:
+ decision = &XOP9_MAP_SYM;
+ break;
+ case XOPA_MAP:
+ decision = &XOPA_MAP_SYM;
+ break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
@@ -122,6 +129,15 @@ static InstrUID decode(OpcodeType type,
case THREEBYTE_A7:
dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case XOP8_MAP:
+ dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOP9_MAP:
+ dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case XOPA_MAP:
+ dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -305,6 +321,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
BOOL prefixGroups[4] = { FALSE };
uint64_t prefixLocation;
uint8_t byte = 0;
+ uint8_t nextByte;
BOOL hasAdSize = FALSE;
BOOL hasOpSize = FALSE;
@@ -314,20 +331,21 @@ static int readPrefixes(struct InternalInstruction* insn) {
while (isPrefix) {
prefixLocation = insn->readerCursor;
+ /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
if (consumeByte(insn, &byte))
- return -1;
+ break;
/*
* If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
* break and let it be disassembled as a normal "instruction".
*/
+ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+ break;
+
if (insn->readerCursor - 1 == insn->startLocation
- && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
- uint8_t nextByte;
- if (byte == 0xf0)
- break;
- if (lookAtByte(insn, &nextByte))
- return -1;
+ && (byte == 0xf2 || byte == 0xf3)
+ && !lookAtByte(insn, &nextByte))
+ {
/*
* If the byte is 0xf2 or 0xf3, and any of the following conditions are
* met:
@@ -426,7 +444,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
- insn->vexSize = 0;
+ insn->vexXopType = TYPE_NO_VEX_XOP;
if (byte == 0xc4) {
uint8_t byte1;
@@ -437,7 +455,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
- insn->vexSize = 3;
+ insn->vexXopType = TYPE_VEX_3B;
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
else {
@@ -445,22 +463,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
- if (insn->vexSize == 3) {
- insn->vexPrefix[0] = byte;
- consumeByte(insn, &insn->vexPrefix[1]);
- consumeByte(insn, &insn->vexPrefix[2]);
+ if (insn->vexXopType == TYPE_VEX_3B) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
+ consumeByte(insn, &insn->vexXopPrefix[2]);
/* We simulate the REX prefix for simplicity's sake */
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
- | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
- | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
- | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
- | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+ | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0);
}
- switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ switch (ppFromVEX3of3(insn->vexXopPrefix[2]))
{
default:
break;
@@ -469,7 +487,9 @@ static int readPrefixes(struct InternalInstruction* insn) {
break;
}
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vexXopPrefix[0], insn->vexXopPrefix[1],
+ insn->vexXopPrefix[2]);
}
}
else if (byte == 0xc5) {
@@ -481,22 +501,22 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
- insn->vexSize = 2;
+ insn->vexXopType = TYPE_VEX_2B;
}
else {
unconsumeByte(insn);
}
- if (insn->vexSize == 2) {
- insn->vexPrefix[0] = byte;
- consumeByte(insn, &insn->vexPrefix[1]);
+ if (insn->vexXopType == TYPE_VEX_2B) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
if (insn->mode == MODE_64BIT) {
insn->rexPrefix = 0x40
- | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+ | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2);
}
- switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ switch (ppFromVEX2of2(insn->vexXopPrefix[1]))
{
default:
break;
@@ -505,7 +525,53 @@ static int readPrefixes(struct InternalInstruction* insn) {
break;
}
- dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]);
+ }
+ }
+ else if (byte == 0x8f) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of XOP");
+ return -1;
+ }
+
+ if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
+ insn->vexXopType = TYPE_XOP;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexXopType == TYPE_XOP) {
+ insn->vexXopPrefix[0] = byte;
+ consumeByte(insn, &insn->vexXopPrefix[1]);
+ consumeByte(insn, &insn->vexXopPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ if (insn->mode == MODE_64BIT) {
+ insn->rexPrefix = 0x40
+ | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3)
+ | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2)
+ | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1)
+ | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0);
+ }
+
+ switch (ppFromXOP3of3(insn->vexXopPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
+ insn->vexXopPrefix[0], insn->vexXopPrefix[1],
+ insn->vexXopPrefix[2]);
}
}
else {
@@ -580,37 +646,49 @@ static int readOpcode(struct InternalInstruction* insn) {
insn->opcodeType = ONEBYTE;
- if (insn->vexSize == 3)
+ if (insn->vexXopType == TYPE_VEX_3B)
{
- switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1]))
{
default:
- dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
return -1;
- case 0:
- break;
case VEX_LOB_0F:
- insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F38:
- insn->twoByteEscape = 0x0f;
- insn->threeByteEscape = 0x38;
insn->opcodeType = THREEBYTE_38;
return consumeByte(insn, &insn->opcode);
case VEX_LOB_0F3A:
- insn->twoByteEscape = 0x0f;
- insn->threeByteEscape = 0x3a;
insn->opcodeType = THREEBYTE_3A;
return consumeByte(insn, &insn->opcode);
}
}
- else if (insn->vexSize == 2)
+ else if (insn->vexXopType == TYPE_VEX_2B)
{
- insn->twoByteEscape = 0x0f;
insn->opcodeType = TWOBYTE;
return consumeByte(insn, &insn->opcode);
}
+ else if (insn->vexXopType == TYPE_XOP)
+ {
+ switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
+ mmmmmFromVEX2of3(insn->vexXopPrefix[1]));
+ return -1;
+ case XOP_MAP_SELECT_8:
+ insn->opcodeType = XOP8_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_9:
+ insn->opcodeType = XOP9_MAP;
+ return consumeByte(insn, &insn->opcode);
+ case XOP_MAP_SELECT_A:
+ insn->opcodeType = XOPA_MAP;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
if (consumeByte(insn, &current))
return -1;
@@ -618,16 +696,12 @@ static int readOpcode(struct InternalInstruction* insn) {
if (current == 0x0f) {
dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
- insn->twoByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
if (current == 0x38) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -635,8 +709,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0x3a) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -644,8 +716,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0xa6) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -653,8 +723,6 @@ static int readOpcode(struct InternalInstruction* insn) {
} else if (current == 0xa7) {
dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
- insn->threeByteEscape = current;
-
if (consumeByte(insn, &current))
return -1;
@@ -768,11 +836,27 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
- if (insn->vexSize) {
+ if (insn->vexXopType != TYPE_NO_VEX_XOP) {
attrMask |= ATTR_VEX;
- if (insn->vexSize == 3) {
- switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ if (insn->vexXopType == TYPE_VEX_3B) {
+ switch (ppFromVEX3of3(insn->vexXopPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX3of3(insn->vexXopPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexXopType == TYPE_VEX_2B) {
+ switch (ppFromVEX2of2(insn->vexXopPrefix[1])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
@@ -784,11 +868,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
break;
}
- if (lFromVEX3of3(insn->vexPrefix[2]))
+ if (lFromVEX2of2(insn->vexXopPrefix[1]))
attrMask |= ATTR_VEXL;
}
- else if (insn->vexSize == 2) {
- switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ else if (insn->vexXopType == TYPE_XOP) {
+ switch (ppFromXOP3of3(insn->vexXopPrefix[2])) {
case VEX_PREFIX_66:
attrMask |= ATTR_OPSIZE;
break;
@@ -800,7 +884,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
break;
}
- if (lFromVEX2of2(insn->vexPrefix[1]))
+ if (lFromXOP3of3(insn->vexXopPrefix[2]))
attrMask |= ATTR_VEXL;
}
else {
@@ -826,42 +910,6 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
/* The following clauses compensate for limitations of the tables. */
- if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
- !(attrMask & ATTR_OPSIZE)) {
- /*
- * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
- * has precedence since there are no L-bit with W-bit entries in the tables.
- * So if the L-bit isn't significant we should use the W-bit instead.
- * We only need to do this if the instruction doesn't specify OpSize since
- * there is a VEX_L_W_OPSIZE table.
- */
-
- const struct InstructionSpecifier *spec;
- uint16_t instructionIDWithWBit;
- const struct InstructionSpecifier *specWithWBit;
-
- spec = specifierForUID(instructionID);
-
- if (getIDWithAttrMask(&instructionIDWithWBit,
- insn,
- (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
- insn->instructionID = instructionID;
- insn->spec = spec;
- return 0;
- }
-
- specWithWBit = specifierForUID(instructionIDWithWBit);
-
- if (instructionID != instructionIDWithWBit) {
- insn->instructionID = instructionIDWithWBit;
- insn->spec = specWithWBit;
- } else {
- insn->instructionID = instructionID;
- insn->spec = spec;
- }
- return 0;
- }
-
if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
/*
* The instruction tables make no distinction between instructions that
@@ -1502,10 +1550,12 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
- if (insn->vexSize == 3)
- insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
- else if (insn->vexSize == 2)
- insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ if (insn->vexXopType == TYPE_VEX_3B)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]);
+ else if (insn->vexXopType == TYPE_VEX_2B)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]);
+ else if (insn->vexXopType == TYPE_XOP)
+ insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]);
else
return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index dcb6aad..6d03d5c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -59,6 +59,15 @@ extern "C" {
#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
#define ppFromVEX2of2(vex) ((vex) & 0x3)
+#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
+#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
+#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
+#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
+#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
+#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
+#define ppFromXOP3of3(xop) ((xop) & 0x3)
+
/*
* These enums represent Intel registers for use by the decoder.
*/
@@ -447,6 +456,12 @@ typedef enum {
VEX_LOB_0F3A = 0x3
} VEXLeadingOpcodeByte;
+typedef enum {
+ XOP_MAP_SELECT_8 = 0x8,
+ XOP_MAP_SELECT_9 = 0x9,
+ XOP_MAP_SELECT_A = 0xA
+} XOPMapSelect;
+
/*
* VEXPrefixCode - Possible values for the VEX.pp field
*/
@@ -458,6 +473,13 @@ typedef enum {
VEX_PREFIX_F2 = 0x3
} VEXPrefixCode;
+typedef enum {
+ TYPE_NO_VEX_XOP = 0x0,
+ TYPE_VEX_2B = 0x1,
+ TYPE_VEX_3B = 0x2,
+ TYPE_XOP = 0x3
+} VEXXOPType;
+
typedef uint8_t BOOL;
/*
@@ -514,10 +536,10 @@ struct InternalInstruction {
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
uint64_t prefixLocations[0x100];
- /* The value of the VEX prefix, if present */
- uint8_t vexPrefix[3];
+ /* The value of the VEX/XOP prefix, if present */
+ uint8_t vexXopPrefix[3];
/* The length of the VEX prefix (0 if not present) */
- uint8_t vexSize;
+ VEXXOPType vexXopType;
/* The value of the REX prefix, if present */
uint8_t rexPrefix;
/* The location where a mandatory prefix would have to be (i.e., right before
@@ -541,10 +563,6 @@ struct InternalInstruction {
/* opcode state */
- /* The value of the two-byte escape prefix (usually 0x0f) */
- uint8_t twoByteEscape;
- /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
- uint8_t threeByteEscape;
/* The last byte of the opcode, not counting any ModR/M extension */
uint8_t opcode;
/* The ModR/M byte of the instruction, if it is an opcode extension */
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index d291441..dd1719c 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -32,6 +32,9 @@
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
+#define XOP8_MAP_SYM x86DisassemblerXOP8Opcodes
+#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes
+#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -41,6 +44,9 @@
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
+#define XOP8_MAP_STR "x86DisassemblerXOP8Opcodes"
+#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
+#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
/*
* Attributes of an instruction that must be known before the opcode can be
@@ -116,10 +122,10 @@ enum attributeBits {
ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\
ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \
- ENUM_ENTRY(IC_VEX_L_W, 3, "requires VEX, L and W") \
- ENUM_ENTRY(IC_VEX_L_W_XS, 4, "requires VEX, L, W and XS prefix") \
- ENUM_ENTRY(IC_VEX_L_W_XD, 4, "requires VEX, L, W and XD prefix") \
- ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 4, "requires VEX, L, W and OpSize") \
+ ENUM_ENTRY(IC_VEX_L_W, 4, "requires VEX, L and W") \
+ ENUM_ENTRY(IC_VEX_L_W_XS, 5, "requires VEX, L, W and XS prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_XD, 5, "requires VEX, L, W and XD prefix") \
+ ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") \
ENUM_ENTRY(IC_EVEX, 1, "requires an EVEX prefix") \
ENUM_ENTRY(IC_EVEX_XS, 2, "requires EVEX and the XS prefix") \
ENUM_ENTRY(IC_EVEX_XD, 2, "requires EVEX and the XD prefix") \
@@ -215,7 +221,55 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_K_B, 3, "requires EVEX_B, EVEX_K, L2 and W") \
ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XS prefix") \
ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and XD prefix") \
- ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize")
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4, "requires EVEX_B, EVEX_K, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ_B, 1, "requires EVEX_B and EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B, 2, "requires EVEX_B, EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ_B, 3, "requires EVEX_B, EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ_B, 3, "requires EVEX_B, EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ_B, 4, "requires EVEX_B, EVEX_KZ, L2, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_KZ, 1, "requires an EVEX_KZ prefix") \
+ ENUM_ENTRY(IC_EVEX_XS_KZ, 2, "requires EVEX_KZ and the XS prefix") \
+ ENUM_ENTRY(IC_EVEX_XD_KZ, 2, "requires EVEX_KZ and the XD prefix") \
+ ENUM_ENTRY(IC_EVEX_OPSIZE_KZ, 2, "requires EVEX_KZ and the OpSize prefix") \
+ ENUM_ENTRY(IC_EVEX_W_KZ, 3, "requires EVEX_KZ and the W prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XS_KZ, 4, "requires EVEX_KZ, W, and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_W_XD_KZ, 4, "requires EVEX_KZ, W, and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ, 4, "requires EVEX_KZ, W, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_KZ, 3, "requires EVEX_KZ and the L prefix") \
+ ENUM_ENTRY(IC_EVEX_L_XS_KZ, 4, "requires EVEX_KZ and the L and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L_XD_KZ, 4, "requires EVEX_KZ and the L and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L_OPSIZE_KZ, 4, "requires EVEX_KZ, L, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L_W_KZ, 3, "requires EVEX_KZ, L and W") \
+ ENUM_ENTRY(IC_EVEX_L_W_XS_KZ, 4, "requires EVEX_KZ, L, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_XD_KZ, 4, "requires EVEX_KZ, L, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L, W and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_KZ, 3, "requires EVEX_KZ and the L2 prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_XS_KZ, 4, "requires EVEX_KZ and the L2 and XS prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_XD_KZ, 4, "requires EVEX_KZ and the L2 and XD prefix")\
+ ENUM_ENTRY(IC_EVEX_L2_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, and OpSize") \
+ ENUM_ENTRY(IC_EVEX_L2_W_KZ, 3, "requires EVEX_KZ, L2 and W") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ, 4, "requires EVEX_KZ, L2, W and XS prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ, 4, "requires EVEX_KZ, L2, W and XD prefix") \
+ ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
@@ -234,7 +288,10 @@ typedef enum {
THREEBYTE_38 = 2,
THREEBYTE_3A = 3,
THREEBYTE_A6 = 4,
- THREEBYTE_A7 = 5
+ THREEBYTE_A7 = 5,
+ XOP8_MAP = 6,
+ XOP9_MAP = 7,
+ XOPA_MAP = 8
} OpcodeType;
/*
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index b9d0082..4439311 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -215,3 +215,19 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << markup(">");
}
+
+void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ O << markup("<mem:");
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ O << *DispSpec.getExpr();
+ }
+
+ O << markup(">");
+}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 8d05256..a8fab72 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -42,7 +42,8 @@ public:
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+ void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
@@ -86,6 +87,19 @@ public:
void printf512mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
+
+ void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemOffset(MI, OpNo, O);
+ }
};
}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 9dfc9a9..e7e7b15 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -200,3 +200,19 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
O << ']';
}
+
+void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
+ const MCOperand &DispSpec = MI->getOperand(Op);
+
+ O << '[';
+
+ if (DispSpec.isImm()) {
+ O << formatImm(DispSpec.getImm());
+ } else {
+ assert(DispSpec.isExpr() && "non-immediate displacement?");
+ O << *DispSpec.getExpr();
+ }
+
+ O << ']';
+}
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 45beeda..590bf68 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -39,7 +39,8 @@ public:
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
void printAVXCC(const MCInst *MI, unsigned Op, raw_ostream &O);
void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
+ void printMemOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "opaque ptr ";
printMemReference(MI, OpNo, O);
@@ -97,6 +98,23 @@ public:
O << "zmmword ptr ";
printMemReference(MI, OpNo, O);
}
+
+ void printMemOffs8(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "byte ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs16(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "word ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs32(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "dword ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
+ void printMemOffs64(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "qword ptr ";
+ printMemOffset(MI, OpNo, O);
+ }
};
}
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 598ddee..f8e359b 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -19,10 +20,10 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -67,9 +68,16 @@ public:
class X86AsmBackend : public MCAsmBackend {
StringRef CPU;
+ bool HasNopl;
public:
X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU) {}
+ : MCAsmBackend(), CPU(_CPU) {
+ HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
+ CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
+ CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
+ CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
+ CPU != "c3" && CPU != "c3-2";
+ }
unsigned getNumFixupKinds() const {
return X86::NumTargetFixupKinds;
@@ -308,8 +316,8 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// This CPU doesnt support long nops. If needed add more.
// FIXME: Can we get this from the subtarget somehow?
- if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
- CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
+ // FIXME: We could generated something better than plain 0x90.
+ if (!HasNopl) {
for (uint64_t i = 0; i < Count; ++i)
OW->Write8(0x90);
return true;
@@ -334,6 +342,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
/* *** */
namespace {
+
class ELFX86AsmBackend : public X86AsmBackend {
public:
uint8_t OSABI;
@@ -382,35 +391,368 @@ public:
}
};
+namespace CU {
+
+ /// Compact unwind encoding values.
+ enum CompactUnwindEncodings {
+ /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+ /// the return address, then [RE]SP is moved to [RE]BP.
+ UNWIND_MODE_BP_FRAME = 0x01000000,
+
+ /// A frameless function with a small constant stack size.
+ UNWIND_MODE_STACK_IMMD = 0x02000000,
+
+ /// A frameless function with a large constant stack size.
+ UNWIND_MODE_STACK_IND = 0x03000000,
+
+ /// No compact unwind encoding is available.
+ UNWIND_MODE_DWARF = 0x04000000,
+
+ /// Mask for encoding the frame registers.
+ UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
+
+ /// Mask for encoding the frameless registers.
+ UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+ };
+
+} // end CU namespace
+
class DarwinX86AsmBackend : public X86AsmBackend {
+ const MCRegisterInfo &MRI;
+
+ /// \brief Number of registers that can be saved in a compact unwind encoding.
+ enum { CU_NUM_SAVED_REGS = 6 };
+
+ mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
+ bool Is64Bit;
+
+ unsigned OffsetSize; ///< Offset of a "push" instruction.
+ unsigned PushInstrSize; ///< Size of a "push" instruction.
+ unsigned MoveInstrSize; ///< Size of a "move" instruction.
+ unsigned StackDivide; ///< Amount to adjust stack stize by.
+protected:
+ /// \brief Implementation of algorithm to generate the compact unwind encoding
+ /// for the CFI instructions.
+ uint32_t
+ generateCompactUnwindEncodingImpl(ArrayRef<MCCFIInstruction> Instrs) const {
+ if (Instrs.empty()) return 0;
+
+ // Reset the saved registers.
+ unsigned SavedRegIdx = 0;
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+
+ bool HasFP = false;
+
+ // Encode that we are using EBP/RBP as the frame pointer.
+ uint32_t CompactUnwindEncoding = 0;
+
+ unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
+ unsigned InstrOffset = 0;
+ unsigned StackAdjust = 0;
+ unsigned StackSize = 0;
+ unsigned PrevStackSize = 0;
+ unsigned NumDefCFAOffsets = 0;
+
+ for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
+ const MCCFIInstruction &Inst = Instrs[i];
+
+ switch (Inst.getOperation()) {
+ default:
+ // Any other CFI directives indicate a frame that we aren't prepared
+ // to represent via compact unwind, so just bail out.
+ return 0;
+ case MCCFIInstruction::OpDefCfaRegister: {
+ // Defines a frame pointer. E.g.
+ //
+ // movq %rsp, %rbp
+ // L0:
+ // .cfi_def_cfa_register %rbp
+ //
+ HasFP = true;
+ assert(MRI.getLLVMRegNum(Inst.getRegister(), true) ==
+ (Is64Bit ? X86::RBP : X86::EBP) && "Invalid frame pointer!");
+
+ // Reset the counts.
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ StackAdjust = 0;
+ SavedRegIdx = 0;
+ InstrOffset += MoveInstrSize;
+ break;
+ }
+ case MCCFIInstruction::OpDefCfaOffset: {
+ // Defines a new offset for the CFA. E.g.
+ //
+ // With frame:
+ //
+ // pushq %rbp
+ // L0:
+ // .cfi_def_cfa_offset 16
+ //
+ // Without frame:
+ //
+ // subq $72, %rsp
+ // L0:
+ // .cfi_def_cfa_offset 80
+ //
+ PrevStackSize = StackSize;
+ StackSize = std::abs(Inst.getOffset()) / StackDivide;
+ ++NumDefCFAOffsets;
+ break;
+ }
+ case MCCFIInstruction::OpOffset: {
+ // Defines a "push" of a callee-saved register. E.g.
+ //
+ // pushq %r15
+ // pushq %r14
+ // pushq %rbx
+ // L0:
+ // subq $120, %rsp
+ // L1:
+ // .cfi_offset %rbx, -40
+ // .cfi_offset %r14, -32
+ // .cfi_offset %r15, -24
+ //
+ if (SavedRegIdx == CU_NUM_SAVED_REGS)
+ // If there are too many saved registers, we cannot use a compact
+ // unwind encoding.
+ return CU::UNWIND_MODE_DWARF;
+
+ unsigned Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ SavedRegs[SavedRegIdx++] = Reg;
+ StackAdjust += OffsetSize;
+ InstrOffset += PushInstrSize;
+ break;
+ }
+ }
+ }
+
+ StackAdjust /= StackDivide;
+
+ if (HasFP) {
+ if ((StackAdjust & 0xFF) != StackAdjust)
+ // Offset was too big for a compact unwind encoding.
+ return CU::UNWIND_MODE_DWARF;
+
+ // Get the encoding of the saved registers when we have a frame pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
+
+ CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
+ CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
+ CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
+ } else {
+ // If the amount of the stack allocation is the size of a register, then
+ // we "push" the RAX/EAX register onto the stack instead of adjusting the
+ // stack pointer with a SUB instruction. We don't support the push of the
+ // RAX/EAX register with compact unwind. So we check for that situation
+ // here.
+ if ((NumDefCFAOffsets == SavedRegIdx + 1 &&
+ StackSize - PrevStackSize == 1) ||
+ (Instrs.size() == 1 && NumDefCFAOffsets == 1 && StackSize == 2))
+ return CU::UNWIND_MODE_DWARF;
+
+ SubtractInstrIdx += InstrOffset;
+ ++StackAdjust;
+
+ if ((StackSize & 0xFF) == StackSize) {
+ // Frameless stack with a small stack size.
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
+
+ // Encode the stack size.
+ CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
+ } else {
+ if ((StackAdjust & 0x7) != StackAdjust)
+ // The extra stack adjustments are too big for us to handle.
+ return CU::UNWIND_MODE_DWARF;
+
+ // Frameless stack with an offset too large for us to encode compactly.
+ CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
+
+ // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
+ // instruction.
+ CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
+
+ // Encode any extra stack stack adjustments (done via push
+ // instructions).
+ CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
+ }
+
+ // Encode the number of registers saved. (Reverse the list first.)
+ std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
+ CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
+ // Get the encoding of the saved registers when we don't have a frame
+ // pointer.
+ uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
+ if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
+
+ // Encode the register encoding.
+ CompactUnwindEncoding |=
+ RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
+ }
+
+ return CompactUnwindEncoding;
+ }
+
+private:
+ /// \brief Get the compact unwind number for a given register. The number
+ /// corresponds to the enum lists in compact_unwind_encoding.h.
+ int getCompactUnwindRegNum(unsigned Reg) const {
+ static const uint16_t CU32BitRegs[7] = {
+ X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+ };
+ static const uint16_t CU64BitRegs[] = {
+ X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+ };
+ const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
+ if (*CURegs == Reg)
+ return Idx;
+
+ return -1;
+ }
+
+ /// \brief Return the registers encoded for a compact encoding with a frame
+ /// pointer.
+ uint32_t encodeCompactUnwindRegistersWithFrame() const {
+ // Encode the registers in the order they were saved --- 3-bits per
+ // register. The list of saved registers is assumed to be in reverse
+ // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
+ uint32_t RegEnc = 0;
+ for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ unsigned Reg = SavedRegs[i];
+ if (Reg == 0) break;
+
+ int CURegNum = getCompactUnwindRegNum(Reg);
+ if (CURegNum == -1) return ~0U;
+
+ // Encode the 3-bit register number in order, skipping over 3-bits for
+ // each register.
+ RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
+ }
+
+ assert((RegEnc & 0x3FFFF) == RegEnc &&
+ "Invalid compact register encoding!");
+ return RegEnc;
+ }
+
+ /// \brief Create the permutation encoding used with frameless stacks. It is
+ /// passed the number of registers to be saved and an array of the registers
+ /// saved.
+ uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
+ // The saved registers are numbered from 1 to 6. In order to encode the
+ // order in which they were saved, we re-number them according to their
+ // place in the register order. The re-numbering is relative to the last
+ // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
+ // that order:
+ //
+ // Orig Re-Num
+ // ---- ------
+ // 6 6
+ // 2 2
+ // 4 3
+ // 5 3
+ //
+ for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
+ int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
+ if (CUReg == -1) return ~0U;
+ SavedRegs[i] = CUReg;
+ }
+
+ // Reverse the list.
+ std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
+
+ uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+ for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
+ unsigned Countless = 0;
+ for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
+ if (SavedRegs[j] < SavedRegs[i])
+ ++Countless;
+
+ RenumRegs[i] = SavedRegs[i] - Countless - 1;
+ }
+
+ // Take the renumbered values and encode them into a 10-bit number.
+ uint32_t permutationEncoding = 0;
+ switch (RegCount) {
+ case 6:
+ permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+ + 6 * RenumRegs[2] + 2 * RenumRegs[3]
+ + RenumRegs[4];
+ break;
+ case 5:
+ permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
+ + 6 * RenumRegs[3] + 2 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 4:
+ permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
+ + 3 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 3:
+ permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
+ + RenumRegs[5];
+ break;
+ case 2:
+ permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
+ break;
+ case 1:
+ permutationEncoding |= RenumRegs[5];
+ break;
+ }
+
+ assert((permutationEncoding & 0x3FF) == permutationEncoding &&
+ "Invalid compact register encoding!");
+ return permutationEncoding;
+ }
+
public:
- DarwinX86AsmBackend(const Target &T, StringRef CPU)
- : X86AsmBackend(T, CPU) { }
+ DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef CPU,
+ bool Is64Bit)
+ : X86AsmBackend(T, CPU), MRI(MRI), Is64Bit(Is64Bit) {
+ memset(SavedRegs, 0, sizeof(SavedRegs));
+ OffsetSize = Is64Bit ? 8 : 4;
+ MoveInstrSize = Is64Bit ? 3 : 2;
+ StackDivide = Is64Bit ? 8 : 4;
+ PushInstrSize = 1;
+ }
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+ bool SupportsCU;
public:
- DarwinX86_32AsmBackend(const Target &T, StringRef CPU)
- : DarwinX86AsmBackend(T, CPU) {}
+ DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef CPU, bool SupportsCU)
+ : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
- object::mach::CTM_i386,
- object::mach::CSX86_ALL);
+ MachO::CPU_TYPE_I386,
+ MachO::CPU_SUBTYPE_I386_ALL);
+ }
+
+ /// \brief Generate the compact unwind encoding for the CFI instructions.
+ virtual uint32_t
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const {
+ return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
}
};
class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+ bool SupportsCU;
+ const MachO::CPUSubTypeX86 Subtype;
public:
- DarwinX86_64AsmBackend(const Target &T, StringRef CPU)
- : DarwinX86AsmBackend(T, CPU) {
+ DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef CPU, bool SupportsCU,
+ MachO::CPUSubTypeX86 st)
+ : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU),
+ Subtype(st) {
HasReliableSymbolDifference = true;
}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
- object::mach::CTM_x86_64,
- object::mach::CSX86_ALL);
+ MachO::CPU_TYPE_X86_64, Subtype);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -445,15 +787,26 @@ public:
return false;
}
}
+
+ /// \brief Generate the compact unwind encoding for the CFI instructions.
+ virtual uint32_t
+ generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const {
+ return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ }
};
} // end anonymous namespace
-MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
+ StringRef CPU) {
Triple TheTriple(TT);
if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T, CPU);
+ return new DarwinX86_32AsmBackend(T, MRI, CPU,
+ TheTriple.isMacOSX() &&
+ !TheTriple.isMacOSXVersionLT(10, 7));
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return new WindowsX86AsmBackend(T, false, CPU);
@@ -462,11 +815,21 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String
return new ELFX86_32AsmBackend(T, OSABI, CPU);
}
-MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT,
+ StringRef CPU) {
Triple TheTriple(TT);
- if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T, CPU);
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ MachO::CPUSubTypeX86 CS =
+ StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
+ .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
+ .Default(MachO::CPU_SUBTYPE_X86_64_ALL);
+ return new DarwinX86_64AsmBackend(T, MRI, CPU,
+ TheTriple.isMacOSX() &&
+ !TheTriple.isMacOSXVersionLT(10, 7), CS);
+ }
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return new WindowsX86AsmBackend(T, true, CPU);
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 25d1af3..1ef9814 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -354,6 +354,9 @@ namespace X86II {
// XOP9 - Prefix to exclude use of imm byte.
XOP9 = 21 << Op0Shift,
+ // XOPA - Prefix to encode 0xA in VEX.MMMM of XOP instructions.
+ XOPA = 22 << Op0Shift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index b400b87..3ddd865 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -108,6 +108,15 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
case MCSymbolRefExpr::VK_None:
Type = ELF::R_X86_64_64;
break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_X86_64_GOT64;
+ break;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ Type = ELF::R_X86_64_GOTOFF64;
+ break;
+ case MCSymbolRefExpr::VK_TPOFF:
+ Type = ELF::R_X86_64_TPOFF64;
+ break;
case MCSymbolRefExpr::VK_DTPOFF:
Type = ELF::R_X86_64_DTPOFF64;
break;
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 8f4ab46..a3eb4fb 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCRelocationInfo.h"
-#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 7815ae9..3861e1c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -59,10 +59,8 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// for .S files on other systems. Perhaps this is because the file system
// wasn't always case preserving or something.
CommentString = "##";
- PCSymbol = ".";
SupportsDebugInformation = true;
- DwarfUsesInlineInfoSection = true;
UseDataRegionDirectives = MarkedJTDataRegions;
// Exceptions handling
@@ -92,8 +90,6 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
TextAlignFillValue = 0x90;
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
- PCSymbol = ".";
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
@@ -139,6 +135,8 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
+
+ AllowAtInName = true;
}
void X86MCAsmInfoGNUCOFF::anchor() { }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index b6b70fd..80979dd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -17,6 +17,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmInfoCOFF.h"
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Triple;
@@ -35,7 +36,7 @@ namespace llvm {
MCStreamer &Streamer) const;
};
- class X86ELFMCAsmInfo : public MCAsmInfo {
+ class X86ELFMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit X86ELFMCAsmInfo(const Triple &Triple);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8515879..7952607 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -564,7 +564,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned char VEX_W = 0;
// XOP: Use XOP prefix byte 0x8f instead of VEX.
- unsigned char XOP = 0;
+ bool XOP = false;
// VEX_5M (VEX m-mmmmm field):
//
@@ -574,7 +574,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
- // 0b10001: XOP map select - 09h instructions with no imm byte
+ // 0b01001: XOP map select - 09h instructions with no imm byte
+ // 0b01010: XOP map select - 0Ah instructions with imm dword
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -620,7 +621,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_W = 1;
if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
- XOP = 1;
+ XOP = true;
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -665,11 +666,11 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XOP9:
VEX_5M = 0x9;
break;
- case X86II::A6: // Bypass: Not used by VEX
- case X86II::A7: // Bypass: Not used by VEX
- case X86II::TB: // Bypass: Not used by VEX
- case 0:
- break; // No prefix!
+ case X86II::XOPA:
+ VEX_5M = 0xA;
+ break;
+ case X86II::TB: // VEX_5M/VEX_PP already correct
+ break;
}
@@ -786,8 +787,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_4V = getVEXRegisterEncoding(MI, CurOp);
if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
EVEX_V2 = 0x0;
+ CurOp++;
}
- CurOp++;
if (HasEVEX_K)
EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++);
@@ -868,11 +869,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::MRM6r: case X86II::MRM7r:
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
- if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
- EVEX_V2 = 0x0;
- CurOp++;
-
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg()))
+ EVEX_V2 = 0x0;
+ CurOp++;
+ }
if (HasEVEX_K)
EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index bd23ce4..1cbdafd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -368,7 +368,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (TheTriple.isOSWindows() && TheTriple.getEnvironment() != Triple::ELF)
return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ return createELFStreamer(Ctx, 0, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 2f459b4..41ae435 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -79,8 +79,10 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createX86_32AsmBackend(const Target &T, StringRef TT, StringRef CPU);
-MCAsmBackend *createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 75b5acf..209b1d0 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
using namespace object;
-using namespace macho;
+using namespace MachO;
namespace {
class X86_64MachORelocationInfo : public MCRelocationInfo {
@@ -33,7 +33,7 @@ public:
StringRef SymName; SymI->getName(SymName);
uint64_t SymAddr; SymI->getAddress(SymAddr);
- RelocationEntry RE = Obj->getRelocation(Rel.getRawDataRefImpl());
+ any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
bool isPCRel = Obj->getAnyRelocationPCRel(RE);
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
@@ -43,44 +43,44 @@ public:
const MCExpr *Expr = 0;
switch(RelType) {
- case RIT_X86_64_TLV:
+ case X86_64_RELOC_TLV:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
break;
- case RIT_X86_64_Signed4:
+ case X86_64_RELOC_SIGNED_4:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(4, Ctx),
Ctx);
break;
- case RIT_X86_64_Signed2:
+ case X86_64_RELOC_SIGNED_2:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(2, Ctx),
Ctx);
break;
- case RIT_X86_64_Signed1:
+ case X86_64_RELOC_SIGNED_1:
Expr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Sym, Ctx),
MCConstantExpr::Create(1, Ctx),
Ctx);
break;
- case RIT_X86_64_GOTLoad:
+ case X86_64_RELOC_GOT_LOAD:
Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
break;
- case RIT_X86_64_GOT:
+ case X86_64_RELOC_GOT:
Expr = MCSymbolRefExpr::Create(Sym, isPCRel ?
MCSymbolRefExpr::VK_GOTPCREL :
MCSymbolRefExpr::VK_GOT,
Ctx);
break;
- case RIT_X86_64_Subtractor:
+ case X86_64_RELOC_SUBTRACTOR:
{
RelocationRef RelNext;
Obj->getRelocationNext(Rel.getRawDataRefImpl(), RelNext);
- RelocationEntry RENext = Obj->getRelocation(RelNext.getRawDataRefImpl());
+ any_relocation_info RENext = Obj->getRelocation(RelNext.getRawDataRefImpl());
// X86_64_SUBTRACTOR must be followed by a relocation of type
- // X86_64_RELOC_UNSIGNED .
+ // X86_64_RELOC_UNSIGNED.
// NOTE: Scattered relocations don't exist on x86_64.
unsigned RType = Obj->getAnyRelocationType(RENext);
- if (RType != RIT_X86_64_Unsigned)
+ if (RType != X86_64_RELOC_UNSIGNED)
report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
"X86_64_RELOC_SUBTRACTOR.");
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 64f005c..eb7c0b1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -16,12 +16,11 @@
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
-using namespace llvm::object;
namespace {
class X86MachObjectWriter : public MCMachObjectTargetWriter {
@@ -132,7 +131,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
if (Target.isAbsolute()) { // constant
// SymbolNum of 0 indicates the absolute section.
- Type = macho::RIT_X86_64_Unsigned;
+ Type = MachO::X86_64_RELOC_UNSIGNED;
Index = 0;
// FIXME: I believe this is broken, I don't think the linker can understand
@@ -141,26 +140,31 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// is to use an absolute symbol (which we don't support yet).
if (IsPCRel) {
IsExtern = 1;
- Type = macho::RIT_X86_64_Branch;
+ Type = MachO::X86_64_RELOC_BRANCH;
}
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
+ if (A->isTemporary())
+ A = &A->AliasedSymbol();
MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
+ if (B->isTemporary())
+ B = &B->AliasedSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported relocation of modified symbol");
+ report_fatal_error("unsupported relocation of modified symbol", false);
// We don't support PCrel relocations of differences. Darwin 'as' doesn't
// implement most of these correctly.
if (IsPCRel)
- report_fatal_error("unsupported pc-relative relocation of difference");
+ report_fatal_error("unsupported pc-relative relocation of difference",
+ false);
// The support for the situation where one or both of the symbols would
// require a local relocation is handled just like if the symbols were
@@ -173,7 +177,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// single SIGNED relocation); reject it for now. Except the case where both
// symbols don't have a base, equal but both NULL.
if (A_Base == B_Base && A_Base)
- report_fatal_error("unsupported relocation with identical base");
+ report_fatal_error("unsupported relocation with identical base", false);
+
+ // A subtraction expression where both symbols are undefined is a
+ // non-relocatable expression.
+ if (A->isUndefined() && B->isUndefined())
+ report_fatal_error("unsupported relocation with subtraction expression",
+ false);
Value += Writer->getSymbolAddress(&A_SD, Layout) -
(A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
@@ -188,15 +198,15 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
- Type = macho::RIT_X86_64_Unsigned;
-
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ Type = MachO::X86_64_RELOC_UNSIGNED;
+
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
if (B_Base) {
@@ -207,7 +217,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
- Type = macho::RIT_X86_64_Subtractor;
+ Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
@@ -252,11 +262,11 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
return;
} else {
report_fatal_error("unsupported relocation of variable '" +
- Symbol->getName() + "'");
+ Symbol->getName() + "'", false);
}
} else {
report_fatal_error("unsupported relocation of undefined symbol '" +
- Symbol->getName() + "'");
+ Symbol->getName() + "'", false);
}
MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -267,15 +277,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// rewrite the movq to an leaq at link time if the symbol ends up in
// the same linkage unit.
if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
- Type = macho::RIT_X86_64_GOTLoad;
+ Type = MachO::X86_64_RELOC_GOT_LOAD;
else
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- Type = macho::RIT_X86_64_TLV;
+ Type = MachO::X86_64_RELOC_TLV;
} else if (Modifier != MCSymbolRefExpr::VK_None) {
- report_fatal_error("unsupported symbol modifier in relocation");
+ report_fatal_error("unsupported symbol modifier in relocation",
+ false);
} else {
- Type = macho::RIT_X86_64_Signed;
+ Type = MachO::X86_64_RELOC_SIGNED;
// The Darwin x86_64 relocation format has a problem where it cannot
// encode an address (L<foo> + <constant>) which is outside the atom
@@ -292,34 +303,40 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
// (the additional bias), but instead appear to just look at the final
// offset.
switch (-(Target.getConstant() + (1LL << Log2Size))) {
- case 1: Type = macho::RIT_X86_64_Signed1; break;
- case 2: Type = macho::RIT_X86_64_Signed2; break;
- case 4: Type = macho::RIT_X86_64_Signed4; break;
+ case 1: Type = MachO::X86_64_RELOC_SIGNED_1; break;
+ case 2: Type = MachO::X86_64_RELOC_SIGNED_2; break;
+ case 4: Type = MachO::X86_64_RELOC_SIGNED_4; break;
}
}
} else {
if (Modifier != MCSymbolRefExpr::VK_None)
report_fatal_error("unsupported symbol modifier in branch "
- "relocation");
+ "relocation", false);
- Type = macho::RIT_X86_64_Branch;
+ Type = MachO::X86_64_RELOC_BRANCH;
}
} else {
if (Modifier == MCSymbolRefExpr::VK_GOT) {
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
} else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
// GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
// case all we do is set the PCrel bit in the relocation entry; this is
// used with exception handling, for example. The source is required to
// include any necessary offset directly.
- Type = macho::RIT_X86_64_GOT;
+ Type = MachO::X86_64_RELOC_GOT;
IsPCRel = 1;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- report_fatal_error("TLVP symbol modifier should have been rip-rel");
+ report_fatal_error("TLVP symbol modifier should have been rip-rel",
+ false);
} else if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in relocation");
- else
- Type = macho::RIT_X86_64_Unsigned;
+ report_fatal_error("unsupported symbol modifier in relocation", false);
+ else {
+ Type = MachO::X86_64_RELOC_UNSIGNED;
+ unsigned Kind = Fixup.getKind();
+ if (Kind == X86::reloc_signed_4byte)
+ report_fatal_error("32-bit absolute addressing is not supported in "
+ "64-bit mode", false);
+ }
}
}
@@ -327,13 +344,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
FixedValue = Value;
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -347,7 +364,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = macho::RIT_Vanilla;
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -355,7 +372,8 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
if (!A_SD->getFragment())
report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression");
+ "' can not be undefined in a subtraction expression",
+ false);
uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
@@ -367,22 +385,23 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
if (!B_SD->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
- "' can not be undefined in a subtraction expression");
+ "' can not be undefined in a subtraction expression",
+ false);
// Select the appropriate difference relocation type.
//
// Note that there is no longer any semantic difference between these two
// relocation types from the linkers point of view, this is done solely for
// pedantic compatibility with 'as'.
- Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
- (unsigned)macho::RIT_Generic_LocalDifference;
+ Type = A_SD->isExternal() ? (unsigned)MachO::GENERIC_RELOC_SECTDIFF :
+ (unsigned)MachO::GENERIC_RELOC_LOCAL_SECTDIFF;
Value2 = Writer->getSymbolAddress(B_SD, Layout);
FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
}
// Relocations are written out in reverse order, so the PAIR comes first.
- if (Type == macho::RIT_Difference ||
- Type == macho::RIT_Generic_LocalDifference) {
+ if (Type == MachO::GENERIC_RELOC_SECTDIFF ||
+ Type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) {
// If the offset is too large to fit in a scattered relocation,
// we're hosed. It's an unfortunate limitation of the MachO format.
if (FixupOffset > 0xffffff) {
@@ -396,13 +415,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
llvm_unreachable("fatal error returned?!");
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((0 << 0) |
- (macho::RIT_Pair << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value2;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((0 << 0) | // r_address
+ (MachO::GENERIC_RELOC_PAIR << 24) | // r_type
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
Writer->addRelocation(Fragment->getParent(), MRE);
} else {
// If the offset is more than 24-bits, it won't fit in a scattered
@@ -416,13 +435,13 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
return false;
}
- macho::RelocationEntry MRE;
- MRE.Word0 = ((FixupOffset << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
- macho::RF_Scattered);
- MRE.Word1 = Value;
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
+ MachO::R_SCATTERED);
+ MRE.r_word1 = Value;
Writer->addRelocation(Fragment->getParent(), MRE);
return true;
}
@@ -464,13 +483,13 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = Value;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (1 << 27) | // Extern
- (macho::RIT_Generic_TLV << 28)); // Type
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = Value;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // r_extern
+ (MachO::GENERIC_RELOC_TLV << 28)); // r_type
Writer->addRelocation(Fragment->getParent(), MRE);
}
@@ -530,7 +549,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
//
// FIXME: Currently, these are never generated (see code below). I cannot
// find a case where they are actually emitted.
- Type = macho::RIT_Vanilla;
+ Type = MachO::GENERIC_RELOC_VANILLA;
} else {
// Resolve constant variables.
if (SD->getSymbol().isVariable()) {
@@ -561,17 +580,17 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
if (IsPCRel)
FixedValue -= Writer->getSectionAddress(Fragment->getParent());
- Type = macho::RIT_Vanilla;
+ Type = MachO::GENERIC_RELOC_VANILLA;
}
// struct relocation_info (8 bytes)
- macho::RelocationEntry MRE;
- MRE.Word0 = FixupOffset;
- MRE.Word1 = ((Index << 0) |
- (IsPCRel << 24) |
- (Log2Size << 25) |
- (IsExtern << 27) |
- (Type << 28));
+ MachO::any_relocation_info MRE;
+ MRE.r_word0 = FixupOffset;
+ MRE.r_word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (IsExtern << 27) |
+ (Type << 28));
Writer->addRelocation(Fragment->getParent(), MRE);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index ed64a32..6da4142 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -27,7 +27,7 @@ namespace {
public:
X86WinCOFFObjectWriter(bool Is64Bit_);
- ~X86WinCOFFObjectWriter();
+ virtual ~X86WinCOFFObjectWriter();
virtual unsigned getRelocType(const MCValue &Target,
const MCFixup &Fixup,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 461ea9b..65c5552 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -50,10 +50,10 @@ def FeatureSSE3 : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
def FeatureSSSE3 : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
"Enable SSSE3 instructions",
[FeatureSSE3]>;
-def FeatureSSE41 : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
"Enable SSE 4.1 instructions",
[FeatureSSSE3]>;
-def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
@@ -68,7 +68,7 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureCMOV]>;
-def FeatureCMPXCHG16B : SubtargetFeature<"cmpxchg16b", "HasCmpxchg16b", "true",
+def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
@@ -86,16 +86,16 @@ def FeatureAVX : SubtargetFeature<"avx", "X86SSELevel", "AVX",
def FeatureAVX2 : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
"Enable AVX2 instructions",
[FeatureAVX]>;
-def FeatureAVX512 : SubtargetFeature<"avx-512", "X86SSELevel", "AVX512",
+def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512F",
"Enable AVX-512 instructions",
[FeatureAVX2]>;
-def FeatureERI : SubtargetFeature<"avx-512-eri", "HasERI", "true",
+def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
"Enable AVX-512 Exponential and Reciprocal Instructions",
[FeatureAVX512]>;
-def FeatureCDI : SubtargetFeature<"avx-512-cdi", "HasCDI", "true",
+def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
-def FeaturePFI : SubtargetFeature<"avx-512-pfi", "HasPFI", "true",
+def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
@@ -117,12 +117,15 @@ def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions",
[FeatureSSE2]>;
+def FeatureTBM : SubtargetFeature<"tbm", "HasTBM", "true",
+ "Enable TBM instructions">;
def FeatureMOVBE : SubtargetFeature<"movbe", "HasMOVBE", "true",
"Support MOVBE instruction">;
-def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true",
+def FeatureRDRAND : SubtargetFeature<"rdrnd", "HasRDRAND", "true",
"Support RDRAND instruction">;
def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true",
- "Support 16-bit floating point conversion instructions">;
+ "Support 16-bit floating point conversion instructions",
+ [FeatureAVX]>;
def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
"Support FS/GS Base instructions">;
def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
@@ -137,6 +140,9 @@ def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
"Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
+def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
+ "Enable SHA instructions",
+ [FeatureSSE2]>;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
@@ -163,6 +169,8 @@ include "X86Schedule.td"
def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
"Intel Atom processors">;
+def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
+ "Intel Silvermont processors">;
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
@@ -206,6 +214,14 @@ def : ProcessorModel<"atom", AtomModel,
FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
+// Atom Silvermont.
+def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
+ FeatureSSE42, FeatureCMPXCHG16B,
+ FeatureMOVBE, FeaturePOPCNT,
+ FeaturePCLMUL, FeatureAES,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowBTMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
[FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem,
@@ -276,21 +292,30 @@ def : Proc<"amdfam10", [FeatureSSE4A,
FeaturePOPCNT, FeatureSlowBTMem]>;
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureLZCNT, FeaturePOPCNT]>;
+ FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT]>;
// Jaguar
def : Proc<"btver2", [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL, FeatureBMI,
- FeatureF16C, FeatureMOVBE, FeatureLZCNT,
- FeaturePOPCNT]>;
+ FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
+ FeatureBMI, FeatureF16C, FeatureMOVBE,
+ FeatureLZCNT, FeaturePOPCNT]>;
// Bulldozer
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureLZCNT, FeaturePOPCNT]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePCLMUL,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
+ FeaturePOPCNT, FeatureBMI, FeatureTBM,
+ FeatureFMA]>;
+
+// Steamroller
+def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
+ FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
+ FeatureF16C, FeatureLZCNT,
+ FeaturePOPCNT, FeatureBMI, FeatureTBM,
+ FeatureFMA, FeatureFSGSBase]>;
+
def : Proc<"geode", [Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 9e0ab82..1258441 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -96,7 +96,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
else
- GVSym = Mang->getSymbol(GV);
+ GVSym = getSymbol(GV);
// Handle dllimport linkage.
if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
@@ -109,21 +109,21 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
}
// If the name begins with a dollar-sign, enclose it in parens. We do this
@@ -333,21 +333,21 @@ void X86AsmPrinter::printIntelMemReference(const MachineInstr *MI, unsigned Op,
const MachineOperand &IndexReg = MI->getOperand(Op+2);
const MachineOperand &DispSpec = MI->getOperand(Op+3);
const MachineOperand &SegReg = MI->getOperand(Op+4);
-
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+4, O, Modifier, AsmVariant);
O << ':';
}
-
+
O << '[';
-
+
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOperand(MI, Op, O, Modifier, AsmVariant);
NeedPlus = true;
}
-
+
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
@@ -394,6 +394,7 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
Reg = getX86SubSuperRegister(Reg, MVT::i32);
break;
case 'q': // Print DImode register
+ // FIXME: gcc will actually print e instead of r for 32-bit.
Reg = getX86SubSuperRegister(Reg, MVT::i64);
break;
}
@@ -518,6 +519,27 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+ if (Subtarget->isTargetCOFF()) {
+ // Emit an absolute @feat.00 symbol. This appears to be some kind of
+ // compiler features bitfield read by link.exe.
+ if (!Subtarget->is64Bit()) {
+ MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00"));
+ OutStreamer.BeginCOFFSymbolDef(S);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_NULL);
+ OutStreamer.EndCOFFSymbolDef();
+ // According to the PE-COFF spec, the LSB of this value marks the object
+ // for "registered SEH". This means that all SEH handler entry points
+ // must be registered in .sxdata. Use of any unregistered handlers will
+ // cause the process to terminate immediately. LLVM does not know how to
+ // register any SEH handlers, so its object files should be safe.
+ S->setAbsolute();
+ OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+ OutStreamer.EmitAssignment(
+ S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
+ }
+ }
}
@@ -606,6 +628,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.AddBlankLine();
}
+ SM.serializeToStackMapSection();
+
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
// implementation of multiple entry points). If this doesn't occur, the
@@ -645,12 +669,12 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
+ DLLExportedFns.push_back(getSymbol(I));
for (Module::const_global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I)
if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
+ DLLExportedGlobals.push_back(getSymbol(I));
// Output linker support code for dllexported globals on windows.
if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 6eed5ce..24a768b 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -24,9 +25,21 @@ class MCStreamer;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
+ StackMaps SM;
+
+ // Parses operands of PATCHPOINT and STACKMAP to produce stack map Location
+ // structures. Returns a result location and an iterator to the operand
+ // immediately following the operands consumed.
+ //
+ // This method is implemented in X86MCInstLower.cpp.
+ static std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+ stackmapOperandParser(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ const TargetMachine &TM);
+
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
+ : AsmPrinter(TM, Streamer), SM(*this, stackmapOperandParser) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
new file mode 100644
index 0000000..e76f9fd
--- /dev/null
+++ b/lib/Target/X86/X86CallingConv.h
@@ -0,0 +1,35 @@
+//=== X86CallingConv.h - X86 Custom Calling Convention Routines -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the X86 Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86CALLINGCONV_H
+#define X86CALLINGCONV_H
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
+ CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
+ CCState &) {
+ llvm_unreachable("The AnyReg calling convention is only supported by the " \
+ "stackmap and patchpoint intrinsics.");
+ // gracefully fallback to X86 C calling convention on Release builds.
+ return false;
+}
+
+} // End llvm namespace
+
+#endif
+
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 38e2591..a78b5c0 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -151,6 +151,26 @@ def RetCC_X86_64_HiPE : CallingConv<[
CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
]>;
+// X86-64 WebKit_JS return-value convention.
+def RetCC_X86_64_WebKit_JS : CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: RAX
+ CCIfType<[i64], CCAssignToReg<[RAX]>>
+]>;
+
+// X86-64 AnyReg return-value convention. No explicit register is specified for
+// the return-value. The register allocator is allowed and expected to choose
+// any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the X86 C calling convention.
+def RetCC_X86_64_AnyReg : CallingConv<[
+ CCCustom<"CC_X86_AnyReg_Error">
+]>;
+
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
@@ -167,6 +187,10 @@ def RetCC_X86_64 : CallingConv<[
// HiPE uses RetCC_X86_64_HiPE
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
+ // Handle JavaScript calls.
+ CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<RetCC_X86_64_WebKit_JS>>,
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<RetCC_X86_64_AnyReg>>,
+
// Handle explicit CC selection
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
@@ -279,10 +303,10 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
-
+
// Do not pass the sret argument in RCX, the Win64 thiscall calling
- // convention requires "this" to be passed in RCX.
- CCIfCC<"CallingConv::X86_ThisCall",
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
[XMM1, XMM2, XMM3]>>>>,
@@ -329,6 +353,25 @@ def CC_X86_64_HiPE : CallingConv<[
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
+def CC_X86_64_WebKit_JS : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer/FP values are always stored in stack slots that are 8 bytes in size
+ // and 8-byte aligned.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
+
+// No explicit register is specified for the AnyReg calling convention. The
+// register allocator may assign the arguments to any free register.
+//
+// This calling convention is currently only supported by the stackmap and
+// patchpoint intrinsics. All other uses will result in an assert on Debug
+// builds. On Release builds we fallback to the X86 C calling convention.
+def CC_X86_64_AnyReg : CallingConv<[
+ CCCustom<"CC_X86_AnyReg_Error">
+]>;
+
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@@ -354,7 +397,7 @@ def CC_X86_32_Common : CallingConv<[
// Integer/Float values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
-
+
// Doubles get 8-byte slots that are 4-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 4>>,
@@ -520,6 +563,8 @@ def CC_X86_32 : CallingConv<[
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
+ CCIfCC<"CallingConv::WebKit_JS", CCDelegateTo<CC_X86_64_WebKit_JS>>,
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
@@ -558,11 +603,11 @@ def CSR_MostRegs_64 : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
- R13, R14, R15,
+ R13, R14, R15,
(sequence "YMM%u", 6, 15))>;
def CSR_Win64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI,
- R12, R13, R14, R15,
+ R12, R13, R14, R15,
(sequence "ZMM%u", 6, 21),
K4, K5, K6, K7)>;
//Standard C + XMM 8-15
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 5d72b44..14385ed 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -840,7 +840,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
unsigned char VEX_W = 0;
// XOP: Use XOP prefix byte 0x8f instead of VEX.
- unsigned char XOP = 0;
+ bool XOP = false;
// VEX_5M (VEX m-mmmmm field):
//
@@ -850,7 +850,8 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// 0b00011: implied 0F 3A leading opcode bytes
// 0b00100-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
- // 0b10001: XOP map select - 09h instructions with no imm byte
+ // 0b01001: XOP map select - 09h instructions with no imm byte
+ // 0b01010: XOP map select - 0Ah instructions with imm dword
unsigned char VEX_5M = 0x1;
// VEX_4V (VEX vvvv field): a register specifier
@@ -882,7 +883,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
VEX_W = 1;
if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
- XOP = 1;
+ XOP = true;
if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
@@ -919,11 +920,11 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
case X86II::XOP9:
VEX_5M = 0x9;
break;
- case X86II::A6: // Bypass: Not used by VEX
- case X86II::A7: // Bypass: Not used by VEX
- case X86II::TB: // Bypass: Not used by VEX
- case 0:
- break; // No prefix!
+ case X86II::XOPA:
+ VEX_5M = 0xA;
+ break;
+ case X86II::TB: // VEX_5M/VEX_PP already correct
+ break;
}
@@ -982,11 +983,14 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
// dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
- if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
+ CurOp++;
- if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, 1);
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+ }
if (X86II::isX86_64ExtendedReg(
MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
@@ -996,7 +1000,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
VEX_X = 0x0;
if (HasVEX_4VOp3)
- VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands);
break;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
@@ -1006,7 +1010,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
// MemAddr
// src1(VEX_4V), MemAddr
if (HasVEX_4V)
- VEX_4V = getVEXRegisterEncoding(MI, 0);
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
if (X86II::isX86_64ExtendedReg(
MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
@@ -1059,8 +1063,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
case X86II::MRM6r: case X86II::MRM7r:
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
- VEX_4V = getVEXRegisterEncoding(MI, 0);
- if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0x0;
break;
default: // RawFrm
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 5bc3420..97f96ab 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrBuilder.h"
#include "X86RegisterInfo.h"
@@ -125,6 +126,8 @@ private:
return static_cast<const X86TargetMachine *>(&TM);
}
+ bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
+
unsigned TargetMaterializeConstant(const Constant *C);
unsigned TargetMaterializeAlloca(const AllocaInst *C);
@@ -344,9 +347,126 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
return true;
}
+bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
+ // Handle constant address.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ // Can't handle alternate code models yet.
+ if (TM.getCodeModel() != CodeModel::Small)
+ return false;
+
+ // Can't handle TLS yet.
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
+ // it works...).
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ if (const GlobalVariable *GVar =
+ dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
+ if (GVar->isThreadLocal())
+ return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
+
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
+ }
+
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
+ } else {
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = &X86::GR64RegClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = &X86::GR32RegClass;
+ }
+
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
+
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
+ }
+
+ // If all else fails, try to materialize the value in a register.
+ if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+ if (AM.Base.Reg == 0) {
+ AM.Base.Reg = getRegForValue(V);
+ return AM.Base.Reg != 0;
+ }
+ if (AM.IndexReg == 0) {
+ assert(AM.Scale == 1 && "Scale with no index!");
+ AM.IndexReg = getRegForValue(V);
+ return AM.IndexReg != 0;
+ }
+ }
+
+ return false;
+}
+
/// X86SelectAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+ SmallVector<const Value *, 32> GEPs;
+redo_gep:
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
@@ -441,13 +561,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += CI->getSExtValue() * S;
break;
}
- if (isa<AddOperator>(Op) &&
- (!isa<Instruction>(Op) ||
- FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
- == FuncInfo.MBB) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add (in the same block) with a constant operand. Fold the
- // constant.
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
Disp += CI->getSExtValue() * S;
@@ -469,139 +584,43 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
goto unsupported_gep;
}
}
+
// Check for displacement overflow.
if (!isInt<32>(Disp))
break;
- // Ok, the GEP indices were covered by constant-offset and scaled-index
- // addressing. Update the address state and move on to examining the base.
+
AM.IndexReg = IndexReg;
AM.Scale = Scale;
AM.Disp = (uint32_t)Disp;
- if (X86SelectAddress(U->getOperand(0), AM))
+ GEPs.push_back(V);
+
+ if (const GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
+ // Ok, the GEP indices were covered by constant-offset and scaled-index
+ // addressing. Update the address state and move on to examining the base.
+ V = GEP;
+ goto redo_gep;
+ } else if (X86SelectAddress(U->getOperand(0), AM)) {
return true;
+ }
// If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
- break;
- unsupported_gep:
- // Ok, the GEP indices weren't all covered.
- break;
- }
- }
-
- // Handle constant address.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
- if (TM.getCodeModel() != CodeModel::Small)
- return false;
- // Can't handle TLS yet.
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal())
- return false;
-
- // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
- // it works...).
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
- if (GVar->isThreadLocal())
- return false;
-
- // RIP-relative addresses can't have additional register operands, so if
- // we've already folded stuff into the addressing mode, just force the
- // global value into its own register, which we can use as the basereg.
- if (!Subtarget->isPICStyleRIPRel() ||
- (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
- // Okay, we've committed to selecting this global. Set up the address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
-
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
- }
- AM.GVOpFlags = GVFlags;
+ for (SmallVectorImpl<const Value *>::reverse_iterator
+ I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
+ if (handleConstantAddresses(*I, AM))
return true;
- }
-
- // Ok, we need to do a load from a stub. If we've already loaded from
- // this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
- } else {
- Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
- }
-
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
-
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
-
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
-
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
- }
+ return false;
+ unsupported_gep:
+ // Ok, the GEP indices weren't all covered.
+ break;
}
-
- // If all else fails, try to materialize the value in a register.
- if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
- if (AM.Base.Reg == 0) {
- AM.Base.Reg = getRegForValue(V);
- return AM.Base.Reg != 0;
- }
- if (AM.IndexReg == 0) {
- assert(AM.Scale == 1 && "Scale with no index!");
- AM.IndexReg = getRegForValue(V);
- return AM.IndexReg != 0;
- }
}
- return false;
+ return handleConstantAddresses(V, AM);
}
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
@@ -609,9 +628,35 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
- if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ const Instruction *I = dyn_cast<Instruction>(V);
+ // Record if the value is defined in the same basic block.
+ //
+ // This information is crucial to know whether or not folding an
+ // operand is valid.
+ // Indeed, FastISel generates or reuses a virtual register for all
+ // operands of all instructions it selects. Obviously, the definition and
+ // its uses must use the same virtual register otherwise the produced
+ // code is incorrect.
+ // Before instruction selection, FunctionLoweringInfo::set sets the virtual
+ // registers for values that are alive across basic blocks. This ensures
+ // that the values are consistently set between across basic block, even
+ // if different instruction selection mechanisms are used (e.g., a mix of
+ // SDISel and FastISel).
+ // For values local to a basic block, the instruction selection process
+ // generates these virtual registers with whatever method is appropriate
+ // for its needs. In particular, FastISel and SDISel do not share the way
+ // local virtual registers are set.
+ // Therefore, this is impossible (or at least unsafe) to share values
+ // between basic blocks unless they use the same instruction selection
+ // method, which is not guarantee for X86.
+ // Moreover, things like hasOneUse could not be used accurately, if we
+ // allow to reference values across basic blocks whereas they are not
+ // alive across basic blocks initially.
+ bool InMBB = true;
+ if (I) {
Opcode = I->getOpcode();
U = I;
+ InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
@@ -620,18 +665,22 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
switch (Opcode) {
default: break;
case Instruction::BitCast:
- // Look past bitcasts.
- return X86SelectCallAddress(U->getOperand(0), AM);
+ // Look past bitcasts if its operand is in the same BB.
+ if (InMBB)
+ return X86SelectCallAddress(U->getOperand(0), AM);
+ break;
case Instruction::IntToPtr:
- // Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ // Look past no-op inttoptrs if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
- // Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ // Look past no-op ptrtoints if its operand is in the same BB.
+ if (InMBB &&
+ TLI.getValueType(U->getType()) == TLI.getPointerTy())
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
@@ -1026,7 +1075,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
return false;
// Handle zero-extension from i1 to i8, which is common.
- MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()).getSimpleVT();
+ MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index d21cb8a..38a8351 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -125,6 +125,15 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
// which requires isImm() to be true
return 0;
}
+ break;
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB:
+ if (MI->getOperand(1).getReg() != MI->getOperand(2).getReg()) {
+ // if src1 != src2, then convertToThreeAddress will
+ // need to create a Virtual register, which we cannot do
+ // after register allocation.
+ return 0;
+ }
}
return TII->convertToThreeAddress(MFI, MBBI, 0);
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index b994e67..a06ba9d 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -365,270 +365,13 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
}
}
-/// getCompactUnwindRegNum - Get the compact unwind number for a given
-/// register. The number corresponds to the enum lists in
-/// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) {
- static const uint16_t CU32BitRegs[] = {
- X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
- };
- static const uint16_t CU64BitRegs[] = {
- X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
- };
- const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs;
- for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
- if (*CURegs == Reg)
- return Idx;
-
- return -1;
-}
-
-// Number of registers that can be saved in a compact unwind encoding.
-#define CU_NUM_SAVED_REGS 6
-
-/// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
-/// used with frameless stacks. It is passed the number of registers to be saved
-/// and an array of the registers saved.
-static uint32_t
-encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
- unsigned RegCount, bool Is64Bit) {
- // The saved registers are numbered from 1 to 6. In order to encode the order
- // in which they were saved, we re-number them according to their place in the
- // register order. The re-numbering is relative to the last re-numbered
- // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
- //
- // Orig Re-Num
- // ---- ------
- // 6 6
- // 2 2
- // 4 3
- // 5 3
- //
- for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
- int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit);
- if (CUReg == -1) return ~0U;
- SavedRegs[i] = CUReg;
- }
-
- // Reverse the list.
- std::swap(SavedRegs[0], SavedRegs[5]);
- std::swap(SavedRegs[1], SavedRegs[4]);
- std::swap(SavedRegs[2], SavedRegs[3]);
-
- uint32_t RenumRegs[CU_NUM_SAVED_REGS];
- for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
- unsigned Countless = 0;
- for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
- if (SavedRegs[j] < SavedRegs[i])
- ++Countless;
-
- RenumRegs[i] = SavedRegs[i] - Countless - 1;
- }
-
- // Take the renumbered values and encode them into a 10-bit number.
- uint32_t permutationEncoding = 0;
- switch (RegCount) {
- case 6:
- permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
- + 6 * RenumRegs[2] + 2 * RenumRegs[3]
- + RenumRegs[4];
- break;
- case 5:
- permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
- + 6 * RenumRegs[3] + 2 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 4:
- permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
- + 3 * RenumRegs[4] + RenumRegs[5];
- break;
- case 3:
- permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
- + RenumRegs[5];
- break;
- case 2:
- permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
- break;
- case 1:
- permutationEncoding |= RenumRegs[5];
- break;
- }
-
- assert((permutationEncoding & 0x3FF) == permutationEncoding &&
- "Invalid compact register encoding!");
- return permutationEncoding;
-}
-
-/// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
-/// compact encoding with a frame pointer.
-static uint32_t
-encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
- bool Is64Bit) {
- // Encode the registers in the order they were saved, 3-bits per register. The
- // registers are numbered from 1 to CU_NUM_SAVED_REGS.
- uint32_t RegEnc = 0;
- for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
- unsigned Reg = SavedRegs[I];
- if (Reg == 0) continue;
-
- int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit);
- if (CURegNum == -1) return ~0U;
-
- // Encode the 3-bit register number in order, skipping over 3-bits for each
- // register.
- RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
- }
-
- assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
- return RegEnc;
-}
-
-uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
- unsigned StackPtr = RegInfo->getStackRegister();
-
- bool Is64Bit = STI.is64Bit();
- bool HasFP = hasFP(MF);
-
- unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
- unsigned SavedRegIdx = 0;
-
- unsigned OffsetSize = (Is64Bit ? 8 : 4);
-
- unsigned PushInstr = (Is64Bit ? X86::PUSH64r : X86::PUSH32r);
- unsigned PushInstrSize = 1;
- unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
- unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
- unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
-
- unsigned StackDivide = (Is64Bit ? 8 : 4);
-
- unsigned InstrOffset = 0;
- unsigned StackAdjust = 0;
- unsigned StackSize = 0;
-
- MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
- bool ExpectEnd = false;
- for (MachineBasicBlock::iterator
- MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ++MBBI) {
- MachineInstr &MI = *MBBI;
- unsigned Opc = MI.getOpcode();
- if (Opc == X86::PROLOG_LABEL) continue;
- if (!MI.getFlag(MachineInstr::FrameSetup)) break;
-
- // We don't exect any more prolog instructions.
- if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
-
- if (Opc == PushInstr) {
- // If there are too many saved registers, we cannot use compact encoding.
- if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
-
- unsigned Reg = MI.getOperand(0).getReg();
- if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) {
- ExpectEnd = true;
- continue;
- }
-
- SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
- StackAdjust += OffsetSize;
- InstrOffset += PushInstrSize;
- } else if (Opc == MoveInstr) {
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
-
- if (DstReg != FramePtr || SrcReg != StackPtr)
- return CU::UNWIND_MODE_DWARF;
-
- StackAdjust = 0;
- memset(SavedRegs, 0, sizeof(SavedRegs));
- SavedRegIdx = 0;
- InstrOffset += MoveInstrSize;
- } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
- if (StackSize)
- // We already have a stack size.
- return CU::UNWIND_MODE_DWARF;
-
- if (!MI.getOperand(0).isReg() ||
- MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
- MI.getOperand(0).getReg() != StackPtr || !MI.getOperand(2).isImm())
- // We need this to be a stack adjustment pointer. Something like:
- //
- // %RSP<def> = SUB64ri8 %RSP, 48
- return CU::UNWIND_MODE_DWARF;
-
- StackSize = MI.getOperand(2).getImm() / StackDivide;
- SubtractInstrIdx += InstrOffset;
- ExpectEnd = true;
- }
- }
-
- // Encode that we are using EBP/RBP as the frame pointer.
- uint32_t CompactUnwindEncoding = 0;
- StackAdjust /= StackDivide;
- if (HasFP) {
- if ((StackAdjust & 0xFF) != StackAdjust)
- // Offset was too big for compact encoding.
- return CU::UNWIND_MODE_DWARF;
-
- // Get the encoding of the saved registers when we have a frame pointer.
- uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
- if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
-
- CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
- CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
- CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
- } else {
- ++StackAdjust;
- uint32_t TotalStackSize = StackAdjust + StackSize;
- if ((TotalStackSize & 0xFF) == TotalStackSize) {
- // Frameless stack with a small stack size.
- CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
-
- // Encode the stack size.
- CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
- } else {
- if ((StackAdjust & 0x7) != StackAdjust)
- // The extra stack adjustments are too big for us to handle.
- return CU::UNWIND_MODE_DWARF;
-
- // Frameless stack with an offset too large for us to encode compactly.
- CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
-
- // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
- // instruction.
- CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
-
- // Encode any extra stack stack adjustments (done via push instructions).
- CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
- }
-
- // Encode the number of registers saved.
- CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
-
- // Get the encoding of the saved registers when we don't have a frame
- // pointer.
- uint32_t RegEnc =
- encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
- Is64Bit);
- if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
-
- // Encode the register encoding.
- CompactUnwindEncoding |=
- RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
- }
-
- return CompactUnwindEncoding;
-}
-
/// usesTheStack - This function checks if any of the users of EFLAGS
/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
/// to use the stack, and if we don't adjust the stack we clobber the first
/// frame index.
/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(MachineFunction &MF) {
- MachineRegisterInfo &MRI = MF.getRegInfo();
+static bool usesTheStack(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
re = MRI.reg_end(); ri != re; ++ri)
@@ -863,7 +606,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetEnvMacho()) {
const char *StackProbeSymbol;
bool isSPUpdateNeeded = false;
@@ -964,11 +707,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
if (PushedRegs)
emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
}
-
- // Darwin 10.7 and greater has support for compact unwind encoding.
- if (STI.getTargetTriple().isMacOSX() &&
- !STI.getTargetTriple().isMacOSXVersionLT(10, 7))
- MMI.setCompactUnwindEncoding(getCompactUnwindEncoding(MF));
}
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 6e309d8..3d3b011 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -20,32 +20,6 @@
namespace llvm {
-namespace CU {
-
- /// Compact unwind encoding values.
- enum CompactUnwindEncodings {
- /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
- /// the return address, then [RE]SP is moved to [RE]BP.
- UNWIND_MODE_BP_FRAME = 0x01000000,
-
- /// A frameless function with a small constant stack size.
- UNWIND_MODE_STACK_IMMD = 0x02000000,
-
- /// A frameless function with a large constant stack size.
- UNWIND_MODE_STACK_IND = 0x03000000,
-
- /// No compact unwind encoding is available.
- UNWIND_MODE_DWARF = 0x04000000,
-
- /// Mask for encoding the frame registers.
- UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
-
- /// Mask for encoding the frameless registers.
- UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
- };
-
-} // end CU namespace
-
class MCSymbol;
class X86TargetMachine;
@@ -91,7 +65,6 @@ public:
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
- uint32_t getCompactUnwindEncoding(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9465420..36d1690 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -79,7 +79,8 @@ namespace {
}
bool hasBaseOrIndexReg() const {
- return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ return BaseType == FrameIndexBase ||
+ IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
}
/// isRIPRelative - Return true if this addressing mode is already RIP
@@ -183,7 +184,7 @@ namespace {
SDNode *Select(SDNode *N);
SDNode *SelectGather(SDNode *N, unsigned Opc);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
- SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
+ SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -491,8 +492,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
- EVT SrcVT = N->getOperand(0).getValueType();
- EVT DstVT = N->getValueType(0);
+ MVT SrcVT = N->getOperand(0).getSimpleValueType();
+ MVT DstVT = N->getSimpleValueType(0);
// If any of the sources are vectors, no fp stack involved.
if (SrcVT.isVector() || DstVT.isVector())
@@ -519,7 +520,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
// Here we could have an FP stack truncation or an FPStack <-> SSE convert.
// FPStack has extload and truncstore. SSE can fold direct loads into other
// operations. Based on this, decide what we want to do.
- EVT MemVT;
+ MVT MemVT;
if (N->getOpcode() == ISD::FP_ROUND)
MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
else
@@ -783,7 +784,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
Mask != (0xffu << ScaleLog))
return true;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue Eight = DAG.getConstant(8, MVT::i8);
SDValue NewMask = DAG.getConstant(0xff, VT);
@@ -831,7 +832,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
return true;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
@@ -904,7 +905,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// Scale the leading zero count down based on the actual size of the value.
// Also scale it down based on the size of the shift.
- MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+ MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
// The final check is to ensure that any masked out high bits of X are
// already known to be zero. Otherwise, the mask has a semantic impact
@@ -914,23 +915,23 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// replace them with zero extensions cheaply if necessary.
bool ReplacingAnyExtend = false;
if (X.getOpcode() == ISD::ANY_EXTEND) {
- unsigned ExtendBits =
- X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+ unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
+ X.getOperand(0).getSimpleValueType().getSizeInBits();
// Assume that we'll replace the any-extend with a zero-extend, and
// narrow the search to the extended value.
X = X.getOperand(0);
MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
ReplacingAnyExtend = true;
}
- APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
- MaskLZ);
+ APInt MaskedHighBits =
+ APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
// and an addressing mode. Make it so.
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
if (ReplacingAnyExtend) {
assert(X.getValueType() != VT);
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
@@ -1059,7 +1060,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// We only handle up to 64-bit values here as those are what matter for
// addressing mode optimizations.
- if (X.getValueSizeInBits() > 64) break;
+ if (X.getSimpleValueType().getSizeInBits() > 64) break;
// The mask used for the transform is expected to be post-shift, but we
// found the shift first so just apply the shift to the mask before passing
@@ -1244,7 +1245,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// We only handle up to 64-bit values here as those are what matter for
// addressing mode optimizations.
- if (X.getValueSizeInBits() > 64) break;
+ if (X.getSimpleValueType().getSizeInBits() > 64) break;
if (!isa<ConstantSDNode>(N.getOperand(1)))
break;
@@ -1323,7 +1324,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
if (MatchAddress(N, AM))
return false;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
if (AM.BaseType == X86ISelAddressMode::RegBase) {
if (!AM.Base_Reg.getNode())
AM.Base_Reg = CurDAG->getRegister(0, VT);
@@ -1465,7 +1466,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
assert (T == AM.Segment);
AM.Segment = Copy;
- EVT VT = N.getValueType();
+ MVT VT = N.getSimpleValueType();
unsigned Complexity = 0;
if (AM.BaseType == X86ISelAddressMode::RegBase)
if (AM.Base_Reg.getNode())
@@ -1706,7 +1707,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
// + non-empty, otherwise.
static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDLoc dl,
- enum AtomicOpc &Op, EVT NVT,
+ enum AtomicOpc &Op, MVT NVT,
SDValue Val) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
int64_t CNVal = CN->getSExtValue();
@@ -1753,7 +1754,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
return Val;
}
-SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
+SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
return 0;
@@ -1793,7 +1794,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
unsigned Opc = 0;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: return 0;
case MVT::i8:
if (isCN)
@@ -2047,7 +2048,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
}
SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
- EVT NVT = Node->getValueType(0);
+ MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
unsigned Opcode = Node->getOpcode();
SDLoc dl(Node);
@@ -2056,6 +2057,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
+ Node->setNodeId(-1);
return NULL; // Already selected.
}
@@ -2187,7 +2189,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
unsigned ShlOp, Op;
- EVT CstVT = NVT;
+ MVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
// TODO: AND32ri is the same as AND64ri32 with zext imm.
@@ -2202,7 +2204,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (NVT == CstVT)
break;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
assert(CstVT == MVT::i8);
@@ -2239,7 +2241,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
unsigned LoReg;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break;
case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
@@ -2268,7 +2270,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
bool isSigned = Opcode == ISD::SMUL_LOHI;
bool hasBMI2 = Subtarget->hasBMI2();
if (!isSigned) {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
@@ -2278,7 +2280,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
}
} else {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
@@ -2415,7 +2417,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
bool isSigned = Opcode == ISD::SDIVREM;
if (!isSigned) {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
@@ -2423,7 +2425,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}
} else {
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
@@ -2434,7 +2436,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned LoReg, HiReg, ClrReg;
unsigned SExtOpcode;
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL; ClrReg = HiReg = X86::AH;
@@ -2489,7 +2491,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
- switch (NVT.getSimpleVT().SimpleTy) {
+ switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
SDValue(CurDAG->getMachineNode(
@@ -2609,7 +2611,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// On x86-32, only the ABCD registers have 8-bit subregisters.
if (!Subtarget->is64Bit()) {
const TargetRegisterClass *TRC;
- switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
default: llvm_unreachable("Unsupported TEST operand type!");
@@ -2644,7 +2646,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Put the value in an ABCD register.
const TargetRegisterClass *TRC;
- switch (N0.getValueType().getSimpleVT().SimpleTy) {
+ switch (N0.getSimpleValueType().SimpleTy) {
case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9ffe29f..081c558 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86.h"
+#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
@@ -91,7 +92,7 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
VecIdx);
return Result;
-
+
}
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
@@ -631,7 +632,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+ if (Subtarget->isOSWindows() && !Subtarget->isTargetEnvMacho())
setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
MVT::i64 : MVT::i32, Custom);
else if (TM.Options.EnableSegmentedStacks)
@@ -1150,9 +1151,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
-
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1160,7 +1158,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
@@ -1193,10 +1190,16 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
if (Subtarget->hasFMA() || Subtarget->hasFMA4()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
@@ -1330,7 +1333,16 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
-
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
@@ -1390,6 +1402,9 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::AND, MVT::v8i64, Legal);
setOperationAction(ISD::OR, MVT::v8i64, Legal);
setOperationAction(ISD::XOR, MVT::v8i64, Legal);
+ setOperationAction(ISD::AND, MVT::v16i32, Legal);
+ setOperationAction(ISD::OR, MVT::v16i32, Legal);
+ setOperationAction(ISD::XOR, MVT::v16i32, Legal);
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
@@ -1409,14 +1424,6 @@ void X86TargetLowering::resetOperationActions() {
if (!VT.is512BitVector())
continue;
- if (VT != MVT::v8i64) {
- setOperationAction(ISD::XOR, VT, Promote);
- AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
- setOperationAction(ISD::OR, VT, Promote);
- AddPromotedToType (ISD::OR, VT, MVT::v8i64);
- setOperationAction(ISD::AND, VT, Promote);
- AddPromotedToType (ISD::AND, VT, MVT::v8i64);
- }
if ( EltSize >= 32) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
@@ -1434,8 +1441,6 @@ void X86TargetLowering::resetOperationActions() {
if (!VT.is512BitVector())
continue;
- setOperationAction(ISD::LOAD, VT, Promote);
- AddPromotedToType (ISD::LOAD, VT, MVT::v8i64);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
@@ -1452,6 +1457,7 @@ void X86TargetLowering::resetOperationActions() {
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -1541,7 +1547,16 @@ void X86TargetLowering::resetOperationActions() {
}
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i8;
+ if (!VT.isVector())
+ return MVT::i8;
+
+ const TargetMachine &TM = getTargetMachine();
+ if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512())
+ switch(VT.getVectorNumElements()) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+
return VT.changeVectorElementTypeToInteger();
}
@@ -1750,6 +1765,13 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
+bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ assert(SrcAS != DestAS && "Expected different address spaces!");
+
+ return SrcAS < 256 && DestAS < 256;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1767,6 +1789,11 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
+const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ static const uint16_t ScratchRegs[] = { X86::R11, 0 };
+ return ScratchRegs;
+}
+
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -3532,7 +3559,7 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference
/// the second operand.
-static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
+static bool isPSHUFDMask(ArrayRef<int> Mask, MVT VT) {
if (VT == MVT::v4f32 || VT == MVT::v4i32 )
return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -3542,7 +3569,7 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isPSHUFHWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
@@ -3571,7 +3598,7 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isPSHUFLWMask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
return false;
@@ -3600,14 +3627,14 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
/// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
+static bool isPALIGNRMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if ((VT.is128BitVector() && !Subtarget->hasSSSE3()) ||
(VT.is256BitVector() && !Subtarget->hasInt256()))
return false;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1: VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
// Do not handle 64-bit element shuffles with palignr.
@@ -3690,10 +3717,7 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
/// specifies a shuffle of elements that is suitable for input to 128/256-bit
/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
/// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
- bool Commuted = false) {
- if (!HasFp256 && VT.is256BitVector())
- return false;
+static bool isSHUFPMask(ArrayRef<int> Mask, MVT VT, bool Commuted = false) {
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits()/128;
@@ -3702,6 +3726,10 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
if (NumLaneElems != 2 && NumLaneElems != 4)
return false;
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ bool symetricMaskRequired =
+ (VT.getSizeInBits() >= 256) && (EltSize == 32);
+
// VSHUFPSY divides the resulting vector into 4 chunks.
// The sources are also splitted into 4 chunks, and each destination
// chunk must come from a different source chunk.
@@ -3721,6 +3749,7 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
//
// DST => Y3..Y2, X3..X2, Y1..Y0, X1..X0
//
+ SmallVector<int, 4> MaskVal(NumLaneElems, -1);
unsigned HalfLaneElems = NumLaneElems/2;
for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
for (unsigned i = 0; i != NumLaneElems; ++i) {
@@ -3731,9 +3760,13 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
// For VSHUFPSY, the mask of the second half must be the same as the
// first but with the appropriate offsets. This works in the same way as
// VPERMILPS works with masks.
- if (NumElems != 8 || l == 0 || Mask[i] < 0)
+ if (!symetricMaskRequired || Idx < 0)
continue;
- if (!isUndefOrEqual(Idx, Mask[i]+l))
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Idx - l;
+ continue;
+ }
+ if ((signed)(Idx - l) != MaskVal[i])
return false;
}
}
@@ -3743,7 +3776,7 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVHLPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3762,7 +3795,7 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
/// <2, 3, 2, 3>
-static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3779,7 +3812,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVLPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3801,7 +3834,7 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
-static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -3827,7 +3860,7 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
static
SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
if (VT != MVT::v8i32 && VT != MVT::v8f32)
@@ -3870,70 +3903,92 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
+static bool isUNPCKLMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
- "Unsupported vector type for unpckh");
+ assert(VT.getSizeInBits() >= 128 &&
+ "Unsupported vector type for unpckl");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
}
-
return true;
}
/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
+static bool isUNPCKHMask(ArrayRef<int> Mask, MVT VT,
bool HasInt256, bool V2IsSplat = false) {
- unsigned NumElts = VT.getVectorNumElements();
-
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert(VT.getSizeInBits() >= 128 &&
"Unsupported vector type for unpckh");
- if (VT.is256BitVector() && NumElts != 4 && NumElts != 8 &&
- (!HasInt256 || (NumElts != 16 && NumElts != 32)))
+ // AVX defines UNPCK* to operate independently on 128-bit lanes.
+ unsigned NumLanes;
+ unsigned NumOf256BitLanes;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is256BitVector()) {
+ if (NumElts != 4 && NumElts != 8 &&
+ (!HasInt256 || (NumElts != 16 && NumElts != 32)))
return false;
+ NumLanes = 2;
+ NumOf256BitLanes = 1;
+ } else if (VT.is512BitVector()) {
+ assert(VT.getScalarType().getSizeInBits() >= 32 &&
+ "Unsupported vector type for unpckh");
+ NumLanes = 2;
+ NumOf256BitLanes = 2;
+ } else {
+ NumLanes = 1;
+ NumOf256BitLanes = 1;
+ }
- // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
- // independently on 128-bit lanes.
- unsigned NumLanes = VT.getSizeInBits()/128;
- unsigned NumLaneElts = NumElts/NumLanes;
+ unsigned NumEltsInStride = NumElts/NumOf256BitLanes;
+ unsigned NumLaneElts = NumEltsInStride/NumLanes;
- for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
- for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
- int BitI = Mask[l+i];
- int BitI1 = Mask[l+i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (isUndefOrEqual(BitI1, NumElts))
+ for (unsigned l256 = 0; l256 < NumOf256BitLanes; l256 += 1) {
+ for (unsigned l = 0; l != NumEltsInStride; l += NumLaneElts) {
+ for (unsigned i = 0, j = l+NumLaneElts/2; i != NumLaneElts; i += 2, ++j) {
+ int BitI = Mask[l256*NumEltsInStride+l+i];
+ int BitI1 = Mask[l256*NumEltsInStride+l+i+1];
+ if (!isUndefOrEqual(BitI, j+l256*NumElts))
return false;
- } else {
- if (!isUndefOrEqual(BitI1, j+NumElts))
+ if (V2IsSplat && !isUndefOrEqual(BitI1, NumElts))
+ return false;
+ if (!isUndefOrEqual(BitI1, j+l256*NumElts+NumEltsInStride))
return false;
}
}
@@ -3944,10 +3999,12 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
/// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
bool Is256BitVec = VT.is256BitVector();
+ if (VT.is512BitVector())
+ return false;
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -3985,9 +4042,12 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
/// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
unsigned NumElts = VT.getVectorNumElements();
+ if (VT.is512BitVector())
+ return false;
+
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for unpckh");
@@ -4040,7 +4100,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
/// vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
/// The first half comes from the second half of V1 and the second half from the
/// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
@@ -4072,7 +4132,7 @@ static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned HalfSize = VT.getVectorNumElements()/2;
@@ -4093,6 +4153,44 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
return (FstHalf | (SndHalf << 4));
}
+// Symetric in-lane mask. Each lane has 4 elements (for imm8)
+static bool isPermImmMask(ArrayRef<int> Mask, MVT VT, unsigned& Imm8) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize < 32)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ Imm8 = 0;
+ if (VT.is128BitVector() || (VT.is256BitVector() && EltSize == 64)) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ Imm8 |= Mask[i] << (i*2);
+ }
+ return true;
+ }
+
+ unsigned LaneSize = 4;
+ SmallVector<int, 4> MaskVal(LaneSize, -1);
+
+ for (unsigned l = 0; l != NumElts; l += LaneSize) {
+ for (unsigned i = 0; i != LaneSize; ++i) {
+ if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+ return false;
+ if (Mask[i+l] < 0)
+ continue;
+ if (MaskVal[i] < 0) {
+ MaskVal[i] = Mask[i+l] - l;
+ Imm8 |= MaskVal[i] << (i*2);
+ continue;
+ }
+ if (Mask[i+l] != (signed)(MaskVal[i]+l))
+ return false;
+ }
+ }
+ return true;
+}
+
/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
/// Note that VPERMIL mask matching is different depending whether theunderlying
@@ -4100,38 +4198,39 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
/// to the same elements of the low, but to the higher half of the source.
/// In VPERMILPD the two lanes could be shuffled independently of each other
/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
- if (!HasFp256)
+static bool isVPERMILPMask(ArrayRef<int> Mask, MVT VT) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (VT.getSizeInBits() < 256 || EltSize < 32)
return false;
-
+ bool symetricMaskRequired = (EltSize == 32);
unsigned NumElts = VT.getVectorNumElements();
- // Only match 256-bit with 32/64-bit types
- if (!VT.is256BitVector() || (NumElts != 4 && NumElts != 8))
- return false;
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned LaneSize = NumElts/NumLanes;
+ // 2 or 4 elements in one lane
+
+ SmallVector<int, 4> ExpectedMaskVal(LaneSize, -1);
for (unsigned l = 0; l != NumElts; l += LaneSize) {
for (unsigned i = 0; i != LaneSize; ++i) {
if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
return false;
- if (NumElts != 8 || l == 0)
- continue;
- // VPERMILPS handling
- if (Mask[i] < 0)
- continue;
- if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
- return false;
+ if (symetricMaskRequired) {
+ if (ExpectedMaskVal[i] < 0 && Mask[i+l] >= 0) {
+ ExpectedMaskVal[i] = Mask[i+l] - l;
+ continue;
+ }
+ if (!isUndefOrEqual(Mask[i+l], ExpectedMaskVal[i]+l))
+ return false;
+ }
}
}
-
return true;
}
/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
/// of what x86 movss want. X86 movs requires the lowest element to be lowest
/// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
+static bool isCommutedMOVLMask(ArrayRef<int> Mask, MVT VT,
bool V2IsSplat = false, bool V2IsUndef = false) {
if (!VT.is128BitVector())
return false;
@@ -4155,7 +4254,7 @@ static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
/// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
-static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
+static bool isMOVSHDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
@@ -4163,7 +4262,8 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i+1" is the value the indexed mask element must have
@@ -4178,7 +4278,7 @@ static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
/// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
-static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
+static bool isMOVSLDUPMask(ArrayRef<int> Mask, MVT VT,
const X86Subtarget *Subtarget) {
if (!Subtarget->hasSSE3())
return false;
@@ -4186,7 +4286,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
unsigned NumElems = VT.getVectorNumElements();
if ((VT.is128BitVector() && NumElems != 4) ||
- (VT.is256BitVector() && NumElems != 8))
+ (VT.is256BitVector() && NumElems != 8) ||
+ (VT.is512BitVector() && NumElems != 16))
return false;
// "i" is the value the indexed mask element must have
@@ -4201,7 +4302,7 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
/// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 256-bit
/// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, MVT VT, bool HasFp256) {
if (!HasFp256 || !VT.is256BitVector())
return false;
@@ -4221,7 +4322,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to 128-bit
/// version of MOVDDUP.
-static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
+static bool isMOVDDUPMask(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -4247,7 +4348,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
@@ -4265,7 +4366,7 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
unsigned ElSize = VT.getVectorElementType().getSizeInBits();
bool Result = (Index * ElSize) % vecWidth == 0;
@@ -4292,9 +4393,9 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
/// Handles 128-bit and 256-bit.
static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
- assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ assert((VT.getSizeInBits() >= 128) &&
"Unsupported vector type for PSHUF/SHUFP");
// Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
@@ -4303,10 +4404,10 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
unsigned NumLanes = VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
- assert((NumLaneElts == 2 || NumLaneElts == 4) &&
- "Only supports 2 or 4 elements per lane");
+ assert((NumLaneElts == 2 || NumLaneElts == 4 || NumLaneElts == 8) &&
+ "Only supports 2, 4 or 8 elements per lane");
- unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
+ unsigned Shift = (NumLaneElts >= 4) ? 1 : 0;
unsigned Mask = 0;
for (unsigned i = 0; i != NumElts; ++i) {
int Elt = N->getMaskElt(i);
@@ -4322,7 +4423,7 @@ static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4346,7 +4447,7 @@ static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
+ MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::v8i16 || VT == MVT::v16i16) &&
"Unsupported vector type for PSHUFHW");
@@ -4370,11 +4471,12 @@ static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
- unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+ MVT VT = SVOp->getSimpleValueType(0);
+ unsigned EltSize = VT.is512BitVector() ? 1 :
+ VT.getVectorElementType().getSizeInBits() >> 3;
unsigned NumElts = VT.getVectorNumElements();
- unsigned NumLanes = VT.getSizeInBits()/128;
+ unsigned NumLanes = VT.is512BitVector() ? 1 : VT.getSizeInBits()/128;
unsigned NumLaneElts = NumElts/NumLanes;
int Val = 0;
@@ -4399,7 +4501,7 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MVT VecVT = N->getOperand(0).getValueType().getSimpleVT();
+ MVT VecVT = N->getOperand(0).getSimpleValueType();
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
@@ -4414,7 +4516,7 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- MVT VecVT = N->getValueType(0).getSimpleVT();
+ MVT VecVT = N->getSimpleValueType(0);
MVT ElVT = VecVT.getVectorElementType();
unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
@@ -4449,27 +4551,6 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
-/// getShuffleCLImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions.
-/// Handles 256-bit.
-static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) {
- MVT VT = N->getValueType(0).getSimpleVT();
-
- unsigned NumElts = VT.getVectorNumElements();
-
- assert((VT.is256BitVector() && NumElts == 4) &&
- "Unsupported vector type for VPERMQ/VPERMPD");
-
- unsigned Mask = 0;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Elt = N->getMaskElt(i);
- if (Elt < 0)
- continue;
- Mask |= Elt << (i*2);
- }
-
- return Mask;
-}
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
@@ -4484,7 +4565,7 @@ bool X86::isZeroNode(SDValue Elt) {
/// their permute mask.
static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> MaskVec;
@@ -4506,7 +4587,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
/// match movhlps. The lower half elements should come from upper half of
/// V1 (and in order), and the upper half elements should come from the upper
/// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
+static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
if (VT.getVectorNumElements() != 4)
@@ -4563,7 +4644,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
/// half of V2 (and in order). And since V1 will become the source of the
/// MOVLP, it must be either a vector load or a scalar load to vector.
static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
- ArrayRef<int> Mask, EVT VT) {
+ ArrayRef<int> Mask, MVT VT) {
if (!VT.is128BitVector())
return false;
@@ -4659,6 +4740,11 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
array_lengthof(Ops));
}
+ } else if (VT.is512BitVector()) { // AVX-512
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
} else
llvm_unreachable("Unexpected vector type");
@@ -4715,7 +4801,7 @@ static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
}
/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -4727,7 +4813,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
}
/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
@@ -4743,7 +4829,7 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
// Generate shuffles which repeat i16 and i8 several times until they can be
// represented by v4f32 and then be manipulated by target suported shuffles.
static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
- EVT VT = V.getValueType();
+ MVT VT = V.getSimpleValueType();
int NumElems = VT.getVectorNumElements();
SDLoc dl(V);
@@ -4761,7 +4847,7 @@ static SDValue PromoteSplati8i16(SDValue V, SelectionDAG &DAG, int &EltNo) {
/// getLegalSplat - Generate a legal splat with supported x86 shuffles
static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
- EVT VT = V.getValueType();
+ MVT VT = V.getSimpleValueType();
SDLoc dl(V);
if (VT.is128BitVector()) {
@@ -4787,7 +4873,7 @@ static SDValue getLegalSplat(SelectionDAG &DAG, SDValue V, int EltNo) {
/// PromoteSplat - Splat is promoted to target supported vector shuffles.
static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
- EVT SrcVT = SV->getValueType(0);
+ MVT SrcVT = SV->getSimpleValueType(0);
SDValue V1 = SV->getOperand(0);
SDLoc dl(SV);
@@ -4810,7 +4896,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// instruction because the target has no such instruction. Generate shuffles
// which repeat i16 and i8 several times until they fit in i32, and then can
// be manipulated by target suported shuffles.
- EVT EltVT = SrcVT.getVectorElementType();
+ MVT EltVT = SrcVT.getVectorElementType();
if (EltVT == MVT::i8 || EltVT == MVT::i16)
V1 = PromoteSplati8i16(V1, DAG, EltNo);
@@ -4832,7 +4918,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = V2.getValueType();
+ MVT VT = V2.getSimpleValueType();
SDValue V1 = IsZero
? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
unsigned NumElems = VT.getVectorNumElements();
@@ -4950,7 +5036,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
- MVT ShufVT = V.getValueType().getSimpleVT();
+ MVT ShufVT = V.getSimpleValueType();
unsigned NumElems = ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
@@ -5048,7 +5134,8 @@ bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
/// logical left shift of a vector.
static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems =
+ SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, false /* check zeros from right */, DAG,
SVOp->getMaskElt(0));
@@ -5082,7 +5169,8 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
/// logical left shift of a vector.
static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems =
+ SVOp->getSimpleValueType(0).getVectorNumElements();
unsigned NumZeros = getNumOfConsecutiveZeros(
SVOp, NumElems, true /* check zeros from left */, DAG,
NumElems - SVOp->getMaskElt(NumElems - 1) - 1);
@@ -5118,7 +5206,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
// Although the logic below support any bitwidth size, there are no
// shift instructions which handle more than 128-bit vectors.
- if (!SVOp->getValueType(0).is128BitVector())
+ if (!SVOp->getSimpleValueType(0).is128BitVector())
return false;
if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -5223,9 +5311,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
TLI.getScalarShiftAmountTy(SrcOp.getValueType()))));
}
-SDValue
-X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
- SelectionDAG &DAG) const {
+static SDValue
+LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
// Check if the scalar load can be widened into a vector load. And if
// the address is "base + cst" see if the cst can be "absorbed" into
@@ -5288,7 +5375,10 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
LD->getPointerInfo().getWithOffset(StartOffset),
false, false, false, 0);
- SmallVector<int, 8> Mask(NumElems, EltNo);
+ SmallVector<int, 8> Mask;
+ for (unsigned i = 0; i != NumElems; ++i)
+ Mask.push_back(EltNo);
+
return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
}
@@ -5305,7 +5395,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
- SDLoc &DL, SelectionDAG &DAG) {
+ SDLoc &DL, SelectionDAG &DAG,
+ bool isAfterLegalize) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
@@ -5341,7 +5432,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// load of the entire vector width starting at the base pointer. If we found
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
+
+ if (isAfterLegalize &&
+ !DAG.getTargetLoweringInfo().isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
SDValue NewLd = SDValue();
+
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
@@ -5398,12 +5495,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
/// a scalar load, or a constant.
/// The VBROADCAST node is returned when a pattern is found,
/// or SDValue() otherwise.
-SDValue
-X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
if (!Subtarget->hasFp256())
return SDValue();
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
@@ -5450,7 +5547,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
// Use the register form of the broadcast instruction available on AVX2.
- if (VT.is256BitVector())
+ if (VT.getSizeInBits() >= 256)
Sc = Extract128BitVector(Sc, 0, DAG, dl);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
}
@@ -5492,7 +5589,8 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
assert(C && "Invalid constant type");
- SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
@@ -5528,12 +5626,12 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
// Skip if insert_vec_elt is not supported.
- if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
return SDValue();
SDLoc DL(Op);
@@ -5606,7 +5704,7 @@ X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
@@ -5645,13 +5743,16 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
DAG.getConstant(Immediate, MVT::i16));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
}
- if (!isSplatVector(Op.getNode()))
- llvm_unreachable("Unsupported predicate operation");
-
+ // Splat vector (with undefs)
SDValue In = Op.getOperand(0);
+ for (unsigned i = 1, e = Op.getNumOperands(); i != e; ++i) {
+ if (Op.getOperand(i) != In && Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ llvm_unreachable("Unsupported predicate operation");
+ }
+
SDValue EFLAGS, X86CC;
if (In.getOpcode() == ISD::SETCC) {
SDValue Op0 = In.getOperand(0);
@@ -5679,7 +5780,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
// res = allOnes ### CMOVNE -1, %res
// else
// res = allZero
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
SDValue Bit1 = DAG.getNode(ISD::AND, dl, InVT, In, DAG.getConstant(1, InVT));
EFLAGS = EmitTest(Bit1, X86::COND_NE, DAG);
X86CC = DAG.getConstant(X86::COND_NE, MVT::i8);
@@ -5708,7 +5809,7 @@ SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT ExtVT = VT.getVectorElementType();
unsigned NumElems = Op.getNumOperands();
@@ -5720,7 +5821,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (VT == MVT::v4i32 || VT == MVT::v8i32)
+ if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
return Op;
return getZeroVector(VT, Subtarget, DAG, dl);
@@ -5733,10 +5834,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ if (!VT.is512BitVector())
+ return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
- SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
@@ -5815,7 +5917,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector()) {
+ if (VT.is256BitVector() || VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
@@ -5980,7 +6082,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
if (LD.getNode())
return LD;
@@ -6044,7 +6146,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
- MVT ResVT = Op.getValueType().getSimpleVT();
+ MVT ResVT = Op.getSimpleValueType();
assert((ResVT.is256BitVector() ||
ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide");
@@ -6073,10 +6175,14 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
return SDValue();
if (!Subtarget->hasInt256() && VT == MVT::v16i16)
@@ -6382,10 +6488,10 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
// 1. [ssse3] 1 x pshufb
// 2. [ssse3] 2 x pshufb + 1 x por
// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw
-static
-SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const X86TargetLowering &TLI) {
+static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+ const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
@@ -6401,7 +6507,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
// present, fall back to case 3.
// If SSSE3, use 1 pshufb instruction per vector with elements in the result.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If all result elements are from one input vector, then only translate
@@ -6514,7 +6620,7 @@ static
SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
@@ -6562,7 +6668,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
static
SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SelectionDAG &DAG) {
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
SDLoc dl(SVOp);
unsigned NumElems = VT.getVectorNumElements();
MVT NewVT;
@@ -6599,7 +6705,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
/// getVZextMovL - Return a zero-extending vector move low node.
///
-static SDValue getVZextMovL(MVT VT, EVT OpVT,
+static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
@@ -6641,7 +6747,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
if (NewOp.getNode())
return NewOp;
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
unsigned NumLaneElems = NumElems / 2;
@@ -6753,7 +6859,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
SDLoc dl(SVOp);
- MVT VT = SVOp->getValueType(0).getSimpleVT();
+ MVT VT = SVOp->getSimpleValueType(0);
assert(VT.is128BitVector() && "Unsupported vector size");
@@ -6904,7 +7010,7 @@ static bool MayFoldVectorLoad(SDValue V) {
static
SDValue getMOVDDup(SDValue &Op, SDLoc &dl, SDValue V1, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Canonizalize to v2f64.
V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
@@ -6918,7 +7024,7 @@ SDValue getMOVLowToHigh(SDValue &Op, SDLoc &dl, SelectionDAG &DAG,
bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert(VT != MVT::v2i64 && "unsupported shuffle type");
@@ -6936,7 +7042,7 @@ static
SDValue getMOVHighToLow(SDValue &Op, SDLoc &dl, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
"unsupported shuffle type");
@@ -6952,7 +7058,7 @@ static
SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
unsigned NumElems = VT.getVectorNumElements();
// Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
@@ -7006,13 +7112,13 @@ SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
}
// Reduce a vector shuffle to zext.
-SDValue
-X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
// PMOVZX is only available from SSE41.
if (!Subtarget->hasSSE41())
return SDValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
// Only AVX2 support 256-bit vector integer extending.
if (!Subtarget->hasInt256() && VT.is256BitVector())
@@ -7051,12 +7157,11 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
- LLVMContext *Context = DAG.getContext();
unsigned NBits = VT.getVectorElementType().getSizeInBits() << Shift;
- EVT NeVT = EVT::getIntegerVT(*Context, NBits);
- EVT NVT = EVT::getVectorVT(*Context, NeVT, NumElems >> Shift);
+ MVT NeVT = MVT::getIntegerVT(NBits);
+ MVT NVT = MVT::getVectorVT(NeVT, NumElems >> Shift);
- if (!isTypeLegal(NVT))
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(NVT))
return SDValue();
// Simplify the operand as it's prepared to be fed into shuffle.
@@ -7064,8 +7169,8 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
if (V1.getOpcode() == ISD::BITCAST &&
V1.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
V1.getOperand(0).getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- V1.getOperand(0)
- .getOperand(0).getValueType().getSizeInBits() == SignificantBits) {
+ V1.getOperand(0).getOperand(0)
+ .getSimpleValueType().getSizeInBits() == SignificantBits) {
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
SDValue V = V1.getOperand(0).getOperand(0).getOperand(0);
ConstantSDNode *CIdx =
@@ -7074,19 +7179,19 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
// selection to fold it. Otherwise, we will short the conversion sequence.
if (CIdx && CIdx->getZExtValue() == 0 &&
(!ISD::isNormalLoad(V.getNode()) || !V.hasOneUse())) {
- if (V.getValueSizeInBits() > V1.getValueSizeInBits()) {
+ MVT FullVT = V.getSimpleValueType();
+ MVT V1VT = V1.getSimpleValueType();
+ if (FullVT.getSizeInBits() > V1VT.getSizeInBits()) {
// The "ext_vec_elt" node is wider than the result node.
// In this case we should extract subvector from V.
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast (extract_subvector x)).
- unsigned Ratio = V.getValueSizeInBits() / V1.getValueSizeInBits();
- EVT FullVT = V.getValueType();
- EVT SubVecVT = EVT::getVectorVT(*Context,
- FullVT.getVectorElementType(),
+ unsigned Ratio = FullVT.getSizeInBits() / V1VT.getSizeInBits();
+ MVT SubVecVT = MVT::getVectorVT(FullVT.getVectorElementType(),
FullVT.getVectorNumElements()/Ratio);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, V,
DAG.getIntPtrConstant(0));
}
- V1 = DAG.getNode(ISD::BITCAST, DL, V1.getValueType(), V);
+ V1 = DAG.getNode(ISD::BITCAST, DL, V1VT, V);
}
}
@@ -7094,10 +7199,11 @@ X86TargetLowering::LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
-SDValue
-X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
+static SDValue
+NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -7108,13 +7214,13 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
// Handle splat operations
if (SVOp->isSplat()) {
// Use vbroadcast whenever the splat comes from a foldable load
- SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+ SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
if (Broadcast.getNode())
return Broadcast;
}
// Check integer expanding shuffles.
- SDValue NewOp = LowerVectorIntExtend(Op, DAG);
+ SDValue NewOp = LowerVectorIntExtend(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
@@ -7132,7 +7238,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- MVT NewVT = NewOp.getValueType().getSimpleVT();
+ MVT NewVT = NewOp.getSimpleValueType();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
@@ -7141,7 +7247,7 @@ X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
- MVT NewVT = NewOp.getValueType().getSimpleVT();
+ MVT NewVT = NewOp.getSimpleValueType();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
DAG, Subtarget, dl);
@@ -7156,7 +7262,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
unsigned NumElems = VT.getVectorNumElements();
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
@@ -7194,7 +7300,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// Normalize the input vectors. Here splats, zeroed vectors, profitable
// narrowing and commutation of operands should be handled. The actual code
// doesn't include all of those, work in progress...
- SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
+ SDValue NewOp = NormalizeVectorShuffle(Op, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
@@ -7354,7 +7460,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
// Normalize the node to match x86 shuffle ops if needed
- if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
+ if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true)))
return CommuteVectorShuffle(SVOp, DAG);
// The checks below are all present in isShuffleMaskLegal, but they are
@@ -7377,7 +7483,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePSHUFLWImmediate(SVOp),
DAG);
- if (isSHUFPMask(M, VT, HasFp256))
+ if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
@@ -7396,8 +7502,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
// Handle VPERMILPS/D* permutations
- if (isVPERMILPMask(M, VT, HasFp256)) {
- if (HasInt256 && VT == MVT::v8i32)
+ if (isVPERMILPMask(M, VT)) {
+ if ((HasInt256 && VT == MVT::v8i32) || VT == MVT::v16i32)
return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
getShuffleSHUFImmediate(SVOp), DAG);
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -7413,21 +7519,28 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (BlendOp.getNode())
return BlendOp;
- if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
- SmallVector<SDValue, 8> permclMask;
- for (unsigned i = 0; i != 8; ++i) {
- permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
+ unsigned Imm8;
+ if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
+ return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
+
+ if ((V2IsUndef && HasInt256 && VT.is256BitVector() && NumElems == 8) ||
+ VT.is512BitVector()) {
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+ MVT MaskVectorVT = MVT::getVectorVT(MaskEltVT, NumElems);
+ SmallVector<SDValue, 16> permclMask;
+ for (unsigned i = 0; i != NumElems; ++i) {
+ permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32,
- &permclMask[0], 8);
- // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
- return DAG.getNode(X86ISD::VPERMV, dl, VT,
- DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
- }
- if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
- return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
- getShuffleCLImmediate(SVOp), DAG);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT,
+ &permclMask[0], NumElems);
+ if (V2IsUndef)
+ // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
+ return DAG.getNode(X86ISD::VPERMV, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
+ return DAG.getNode(X86ISD::VPERMV3, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1, V2);
+ }
//===--------------------------------------------------------------------===//
// Since no target specific shuffle was selected for this generic one,
@@ -7443,7 +7556,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (VT == MVT::v16i8) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, Subtarget, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -7467,10 +7580,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- if (!Op.getOperand(0).getValueType().getSimpleVT().is128BitVector())
+ if (!Op.getOperand(0).getSimpleValueType().is128BitVector())
return SDValue();
if (VT.getSizeInBits() == 8) {
@@ -7532,21 +7645,38 @@ SDValue
X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
-
SDValue Vec = Op.getOperand(0);
- MVT VecVT = Vec.getValueType().getSimpleVT();
+ MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = Op.getOperand(1);
+ if (!isa<ConstantSDNode>(Idx)) {
+ if (VecVT.is512BitVector() ||
+ (VecVT.is256BitVector() && Subtarget->hasInt256() &&
+ VecVT.getVectorElementType().getSizeInBits() == 32)) {
+
+ MVT MaskEltVT =
+ MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
+ MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
+ MaskEltVT.getSizeInBits());
+
+ Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+ SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
+ getZeroVector(MaskVT, Subtarget, DAG, dl),
+ Idx, DAG.getConstant(0, getPointerTy()));
+ SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
+ Perm, DAG.getConstant(0, getPointerTy()));
+ }
+ return SDValue();
+ }
// If this is a 256-bit vector result, first extract the 128-bit vector and
// then extract the element from the 128-bit vector.
if (VecVT.is256BitVector() || VecVT.is512BitVector()) {
- SDValue Idx = Op.getOperand(1);
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
// Get the 128-bit vector.
Vec = Extract128BitVector(Vec, IdxVal, DAG, dl);
- EVT EltVT = VecVT.getVectorElementType();
+ MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
@@ -7565,7 +7695,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return Res;
}
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
@@ -7592,7 +7722,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// SHUFPS the element to the lowest double word, then movss.
int Mask[4] = { static_cast<int>(Idx), -1, -1, -1 };
- MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7611,7 +7741,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// Note if the lower 64 bits of the result of the UNPCKHPD is then stored
// to a f64mem, the whole operation is folded into a single MOVHPDmr.
int Mask[2] = { 1, -1 };
- MVT VVT = Op.getOperand(0).getValueType().getSimpleVT();
+ MVT VVT = Op.getOperand(0).getSimpleValueType();
SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
DAG.getUNDEF(VVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
@@ -7622,7 +7752,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
}
static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
@@ -7676,7 +7806,7 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
SDLoc dl(Op);
@@ -7724,17 +7854,15 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
}
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
- LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT OpVT = Op.getValueType().getSimpleVT();
+ MVT OpVT = Op.getSimpleValueType();
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
unsigned SizeFactor = OpVT.getSizeInBits()/128;
- EVT VT128 = EVT::getVectorVT(*Context,
- OpVT.getVectorElementType(),
+ MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
@@ -7762,8 +7890,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SDValue In = Op.getOperand(0);
SDValue Idx = Op.getOperand(1);
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- EVT ResVT = Op.getValueType();
- EVT InVT = In.getValueType();
+ MVT ResVT = Op.getSimpleValueType();
+ MVT InVT = In.getSimpleValueType();
if (Subtarget->hasFp256()) {
if (ResVT.is128BitVector() &&
@@ -7790,16 +7918,16 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
SDValue SubVec = Op.getNode()->getOperand(1);
SDValue Idx = Op.getNode()->getOperand(2);
- if ((Op.getNode()->getValueType(0).is256BitVector() ||
- Op.getNode()->getValueType(0).is512BitVector()) &&
- SubVec.getNode()->getValueType(0).is128BitVector() &&
+ if ((Op.getNode()->getSimpleValueType(0).is256BitVector() ||
+ Op.getNode()->getSimpleValueType(0).is512BitVector()) &&
+ SubVec.getNode()->getSimpleValueType(0).is128BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
}
- if (Op.getNode()->getValueType(0).is512BitVector() &&
- SubVec.getNode()->getValueType(0).is256BitVector() &&
+ if (Op.getNode()->getSimpleValueType(0).is512BitVector() &&
+ SubVec.getNode()->getSimpleValueType(0).is256BitVector() &&
isa<ConstantSDNode>(Idx)) {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -8108,10 +8236,9 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
is64Bit ? 257 : 256));
- SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- DAG.getIntPtrConstant(0),
- MachinePointerInfo(Ptr),
- false, false, false, 0);
+ SDValue ThreadPointer =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0),
+ MachinePointerInfo(Ptr), false, false, false, 0);
unsigned char OperandFlags = 0;
// Most TLS accesses are not RIP relative, even on x86-64. One exception is
@@ -8133,21 +8260,20 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec)
// or "addl x@indntpoff,%eax" (initial exec)
// or "addl x@gotntpoff(%ebx) ,%eax" (initial exec, 32-bit pic)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
- GA->getValueType(0),
- GA->getOffset(), OperandFlags);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+ GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec) {
if (isPIC && !is64Bit) {
Offset = DAG.getNode(ISD::ADD, dl, PtrVT,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false,
- 0);
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -8305,6 +8431,11 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
+ // X86ISD::SHLD and X86ISD::SHRD have defined overflow behavior but the
+ // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away
+ // during isel.
+ SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+ DAG.getConstant(VTBits - 1, MVT::i8));
SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
DAG.getConstant(VTBits - 1, MVT::i8))
: DAG.getConstant(0, VT);
@@ -8312,12 +8443,15 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
SDValue Tmp2, Tmp3;
if (Op.getOpcode() == ISD::SHL_PARTS) {
Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
- Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
} else {
Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
- Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+ Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
}
+ // If the shift amount is larger or equal than the width of a part we can't
+ // rely on the results of shld/shrd. Insert a test and select the appropriate
+ // values for large shift amounts.
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
@@ -8750,9 +8884,9 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
- MVT VT = Op->getValueType(0).getSimpleVT();
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
// Optimize vectors in AVX mode:
@@ -8768,7 +8902,8 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
// Concat upper and lower parts.
//
- if (((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
+ if (((VT != MVT::v16i16) || (InVT != MVT::v16i8)) &&
+ ((VT != MVT::v8i32) || (InVT != MVT::v8i16)) &&
((VT != MVT::v4i64) || (InVT != MVT::v4i32)))
return SDValue();
@@ -8790,8 +8925,39 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
-SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getValueType(0).getSimpleVT();
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getValueType().getSimpleVT();
+ SDLoc DL(Op);
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
+
+ EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Now we have only mask extension
+ assert(InVT.getVectorElementType() == MVT::i1);
+ SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst);
+}
+
+static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
if (Res.getNode())
@@ -8800,12 +8966,16 @@ SDValue X86TargetLowering::LowerANY_EXTEND(SDValue Op,
return SDValue();
}
-SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
+
+static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
+ MVT SVT = In.getSimpleValueType();
+
+ if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
+ return LowerZERO_EXTEND_AVX512(Op, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -8813,33 +8983,44 @@ SDValue X86TargetLowering::LowerZERO_EXTEND(SDValue Op,
return Res;
}
- if (!VT.is256BitVector() || !SVT.is128BitVector() ||
- VT.getVectorNumElements() != SVT.getVectorNumElements())
- return SDValue();
-
- assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
-
- // AVX2 has better support of integer extending.
- if (Subtarget->hasInt256())
- return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
-
- SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
- static const int Mask[] = {4, 5, 6, 7, -1, -1, -1, -1};
- SDValue Hi = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32,
- DAG.getVectorShuffle(MVT::v8i16, DL, In,
- DAG.getUNDEF(MVT::v8i16),
- &Mask[0]));
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i32, Lo, Hi);
+ assert(!VT.is256BitVector() || !SVT.is128BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements());
+ return SDValue();
}
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
-
- if ((VT == MVT::v4i32) && (SVT == MVT::v4i64)) {
+ MVT InVT = In.getSimpleValueType();
+ assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
+ "Invalid TRUNCATE operation");
+
+ if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
+ if (VT.getVectorElementType().getSizeInBits() >=8)
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+ unsigned NumElts = InVT.getVectorNumElements();
+ assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
+ if (InVT.getSizeInBits() < 512) {
+ MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ }
+ SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
+ const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ SDValue CP = DAG.getConstantPool(C, getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld);
+ SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
+ return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
+ }
+
+ if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
@@ -8870,7 +9051,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask2);
}
- if ((VT == MVT::v8i16) && (SVT == MVT::v8i32)) {
+ if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
// On AVX2, v8i32 -> v8i16 becomed PSHUFB.
if (Subtarget->hasInt256()) {
In = DAG.getNode(ISD::BITCAST, DL, MVT::v32i8, In);
@@ -8928,11 +9109,9 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
// Handle truncation of V256 to V128 using shuffles.
- if (!VT.is128BitVector() || !SVT.is256BitVector())
+ if (!VT.is128BitVector() || !InVT.is256BitVector())
return SDValue();
- assert(VT.getVectorNumElements() != SVT.getVectorNumElements() &&
- "Invalid op");
assert(Subtarget->hasFp256() && "256-bit vector without AVX!");
unsigned NumElems = VT.getVectorNumElements();
@@ -8952,7 +9131,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
if (VT == MVT::v8i16)
return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT,
@@ -8996,9 +9175,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
- MVT SVT = In.getValueType().getSimpleVT();
+ MVT SVT = In.getSimpleValueType();
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
@@ -9010,7 +9189,7 @@ static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
@@ -9044,7 +9223,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
LLVMContext *Context = DAG.getContext();
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
if (VT.isVector()) {
@@ -9065,7 +9244,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
- MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ MVT XORVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits()/64);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::XOR, dl, XORVT,
DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -9081,8 +9260,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
- MVT SrcVT = Op1.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = Op1.getSimpleValueType();
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -9158,7 +9337,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
// Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1).
SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0,
@@ -9168,8 +9347,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
// LowerVectorAllZeroTest - Check whether an OR'd tree is PTEST-able.
//
-SDValue X86TargetLowering::LowerVectorAllZeroTest(SDValue Op,
- SelectionDAG &DAG) const {
+static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::OR && "Only check OR'd tree.");
if (!Subtarget->hasSSE41())
@@ -9294,7 +9473,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
unsigned NumOperands = 0;
// Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic intruction before it. Attempt to truncate the operands
+ // and the arithmetic instruction before it. Attempt to truncate the operands
// of the arithmetic instruction and use a reduced bit-width instruction.
bool NeedTruncation = false;
SDValue ArithOp = Op;
@@ -9402,7 +9581,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
case ISD::AND: Opcode = X86ISD::AND; break;
case ISD::OR: {
if (!NeedTruncation && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) {
- SDValue EFLAGS = LowerVectorAllZeroTest(Op, DAG);
+ SDValue EFLAGS = LowerVectorAllZeroTest(Op, Subtarget, DAG);
if (EFLAGS.getNode())
return EFLAGS;
}
@@ -9638,7 +9817,7 @@ static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
// Lower256IntVSETCC - Break a VSETCC 256-bit integer VSETCC into two new 128
// ones, and then concatenate the result back.
static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
"Unsupported value type for operation");
@@ -9665,25 +9844,62 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC));
}
+static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ MVT VT = Op.getSimpleValueType();
+
+ assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 32 &&
+ Op.getValueType().getScalarType() == MVT::i1 &&
+ "Cannot set masked compare for this operation");
+
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDLoc dl(Op);
+
+ bool Unsigned = false;
+ unsigned SSECC;
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETNE: SSECC = 4; break;
+ case ISD::SETEQ: SSECC = 0; break;
+ case ISD::SETUGT: Unsigned = true;
+ case ISD::SETGT: SSECC = 6; break; // NLE
+ case ISD::SETULT: Unsigned = true;
+ case ISD::SETLT: SSECC = 1; break;
+ case ISD::SETUGE: Unsigned = true;
+ case ISD::SETGE: SSECC = 5; break; // NLT
+ case ISD::SETULE: Unsigned = true;
+ case ISD::SETLE: SSECC = 2; break;
+ }
+ unsigned Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM;
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getConstant(SSECC, MVT::i8));
+
+}
+
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- SDValue Cond;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- bool isFP = Op.getOperand(1).getValueType().getSimpleVT().isFloatingPoint();
+ bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
if (isFP) {
#ifndef NDEBUG
- MVT EltVT = Op0.getValueType().getVectorElementType().getSimpleVT();
+ MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
-
+ unsigned Opc = X86ISD::CMPP;
+ if (Subtarget->hasAVX512() && VT.getVectorElementType() == MVT::i1) {
+ assert(VT.getVectorNumElements() <= 16);
+ Opc = X86ISD::CMPM;
+ }
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
unsigned CC0, CC1;
@@ -9695,14 +9911,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, MVT::i8));
- SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, MVT::i8));
return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
- return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, MVT::i8));
}
@@ -9710,6 +9926,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
+ bool MaskResult = (VT.getVectorElementType() == MVT::i1);
+ EVT OpVT = Op1.getValueType();
+ if (Subtarget->hasAVX512()) {
+ if (Op1.getValueType().is512BitVector() ||
+ (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
+ return LowerIntVSETCC_AVX512(Op, DAG);
+
+ // In AVX-512 architecture setcc returns mask with i1 elements,
+ // But there is no compare instruction for i8 and i16 elements.
+ // We are not talking about 512-bit operands in this case, these
+ // types are illegal.
+ if (MaskResult &&
+ (OpVT.getVectorElementType().getSizeInBits() < 32 &&
+ OpVT.getVectorElementType().getSizeInBits() >= 8))
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
+ }
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -9719,15 +9953,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
switch (SetCCOpcode) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
- case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
+ case ISD::SETEQ: Opc = MaskResult? X86ISD::PCMPEQM: X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
- case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
+ case ISD::SETGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
- case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
+ case ISD::SETLE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ Invert = true; break;
case ISD::SETULT: Swap = true;
- case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
+ case ISD::SETUGT: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
- case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
+ case ISD::SETULE: Opc = MaskResult? X86ISD::PCMPGTM: X86ISD::PCMPGT;
+ FlipSigns = true; Invert = true; break;
}
// Special case: Use min/max operations for SETULE/SETUGE
@@ -9841,7 +10078,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG);
@@ -9885,7 +10122,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
}
- bool isFP = Op1.getValueType().getSimpleVT().isFloatingPoint();
+ bool isFP = Op1.getSimpleValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
@@ -10040,7 +10277,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cmp = Cond.getOperand(1);
unsigned Opc = Cmp.getOpcode();
- MVT VT = Op.getValueType().getSimpleVT();
+ MVT VT = Op.getSimpleValueType();
bool IllegalFPCMov = false;
if (VT.isFloatingPoint() && !VT.isVector() &&
@@ -10149,15 +10386,50 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
}
-SDValue X86TargetLowering::LowerSIGN_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- MVT VT = Op->getValueType(0).getSimpleVT();
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
- MVT InVT = In.getValueType().getSimpleVT();
+ MVT InVT = In.getSimpleValueType();
+ SDLoc dl(Op);
+
+ unsigned int NumElts = VT.getVectorNumElements();
+ if (NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+
+ MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32;
+ Constant *C = ConstantInt::get(*DAG.getContext(),
+ APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits()));
+
+ SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+ SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment);
+ SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld);
+ if (VT.is512BitVector())
+ return Brcst;
+ return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst);
+}
+
+static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op->getSimpleValueType(0);
+ SDValue In = Op->getOperand(0);
+ MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
+ if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
+ return LowerSIGN_EXTEND_AVX512(Op, DAG);
+
if ((VT != MVT::v4i64 || InVT != MVT::v4i32) &&
- (VT != MVT::v8i32 || InVT != MVT::v8i16))
+ (VT != MVT::v8i32 || InVT != MVT::v8i16) &&
+ (VT != MVT::v16i16 || InVT != MVT::v16i8))
return SDValue();
if (Subtarget->hasInt256())
@@ -10502,7 +10774,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- // FIXME: Ensure alignment here
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
@@ -10540,14 +10813,20 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
- Flag = Chain.getValue(1);
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
- Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- SPTy).getValue(1);
+ unsigned SPReg = RegInfo->getStackRegister();
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
+ Chain = SP.getValue(1);
- SDValue Ops1[2] = { Chain.getValue(0), Chain };
+ if (Align) {
+ SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
+ }
+
+ SDValue Ops1[2] = { SP, Chain };
return DAG.getMergeValues(Ops1, 2, dl);
}
}
@@ -10698,6 +10977,26 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftByConstNode - Handle vector element shifts where the shift
+// amount is a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, EVT VT,
+ SDValue SrcOp, uint64_t ShiftAmt,
+ SelectionDAG &DAG) {
+
+ // Check for ShiftAmt >= element width
+ if (ShiftAmt >= VT.getVectorElementType().getSizeInBits()) {
+ if (Opc == X86ISD::VSRAI)
+ ShiftAmt = VT.getVectorElementType().getSizeInBits() - 1;
+ else
+ return DAG.getConstant(0, VT);
+ }
+
+ assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI)
+ && "Unknown target vector shift-by-constant node");
+
+ return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
+}
+
// getTargetVShiftNode - Handle vector element shifts where the shift amount
// may or may not be a constant. Takes immediate version of shift as input.
static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
@@ -10705,18 +11004,10 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, EVT VT,
SelectionDAG &DAG) {
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
- if (isa<ConstantSDNode>(ShAmt)) {
- // Constant may be a TargetConstant. Use a regular constant.
- uint32_t ShiftAmt = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- switch (Opc) {
- default: llvm_unreachable("Unknown target vector shift node");
- case X86ISD::VSHLI:
- case X86ISD::VSRLI:
- case X86ISD::VSRAI:
- return DAG.getNode(Opc, dl, VT, SrcOp,
- DAG.getConstant(ShiftAmt, MVT::i32));
- }
- }
+ // Catch shift-by-constant.
+ if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt))
+ return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp,
+ CShAmt->getZExtValue(), DAG);
// Change opcode to non-immediate version
switch (Opc) {
@@ -10919,24 +11210,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
case Intrinsic::x86_sse2_pminu_b:
case Intrinsic::x86_sse41_pminuw:
case Intrinsic::x86_sse41_pminud:
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
case Intrinsic::x86_sse41_pmaxsb:
case Intrinsic::x86_sse2_pmaxs_w:
case Intrinsic::x86_sse41_pmaxsd:
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
case Intrinsic::x86_sse41_pminsb:
case Intrinsic::x86_sse2_pmins_w:
case Intrinsic::x86_sse41_pminsd:
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
- case Intrinsic::x86_avx2_pmins_d: {
+ case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -10946,6 +11245,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxu_b:
case Intrinsic::x86_avx2_pmaxu_w:
case Intrinsic::x86_avx2_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_d:
+ case Intrinsic::x86_avx512_pmaxu_q:
Opcode = X86ISD::UMAX;
break;
case Intrinsic::x86_sse2_pminu_b:
@@ -10954,6 +11255,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pminu_b:
case Intrinsic::x86_avx2_pminu_w:
case Intrinsic::x86_avx2_pminu_d:
+ case Intrinsic::x86_avx512_pminu_d:
+ case Intrinsic::x86_avx512_pminu_q:
Opcode = X86ISD::UMIN;
break;
case Intrinsic::x86_sse41_pmaxsb:
@@ -10962,6 +11265,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmaxs_b:
case Intrinsic::x86_avx2_pmaxs_w:
case Intrinsic::x86_avx2_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_d:
+ case Intrinsic::x86_avx512_pmaxs_q:
Opcode = X86ISD::SMAX;
break;
case Intrinsic::x86_sse41_pminsb:
@@ -10970,6 +11275,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_pmins_b:
case Intrinsic::x86_avx2_pmins_w:
case Intrinsic::x86_avx2_pmins_d:
+ case Intrinsic::x86_avx512_pmins_d:
+ case Intrinsic::x86_avx512_pmins_q:
Opcode = X86ISD::SMIN;
break;
}
@@ -10982,10 +11289,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
- case Intrinsic::x86_avx_min_pd_256: {
+ case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512: {
unsigned Opcode;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -10993,12 +11304,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_sse2_max_pd:
case Intrinsic::x86_avx_max_ps_256:
case Intrinsic::x86_avx_max_pd_256:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512:
Opcode = X86ISD::FMAX;
break;
case Intrinsic::x86_sse_min_ps:
case Intrinsic::x86_sse2_min_pd:
case Intrinsic::x86_avx_min_ps_256:
case Intrinsic::x86_avx_min_pd_256:
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512:
Opcode = X86ISD::FMIN;
break;
}
@@ -11069,7 +11384,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
- // but second operand for node/intruction.
+ // but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
@@ -11144,6 +11459,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case Intrinsic::x86_avx512_kortestz:
+ case Intrinsic::x86_avx512_kortestc: {
+ unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
+ SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
+ SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+ SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
// SSE/AVX shift intrinsics
case Intrinsic::x86_sse2_psll_w:
@@ -11338,7 +11663,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
- case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512: {
unsigned Opc;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
@@ -11346,36 +11683,48 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
case Intrinsic::x86_fma_vfmadd_pd:
case Intrinsic::x86_fma_vfmadd_ps_256:
case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmadd_ps_512:
+ case Intrinsic::x86_fma_vfmadd_pd_512:
Opc = X86ISD::FMADD;
break;
case Intrinsic::x86_fma_vfmsub_ps:
case Intrinsic::x86_fma_vfmsub_pd:
case Intrinsic::x86_fma_vfmsub_ps_256:
case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_512:
+ case Intrinsic::x86_fma_vfmsub_pd_512:
Opc = X86ISD::FMSUB;
break;
case Intrinsic::x86_fma_vfnmadd_ps:
case Intrinsic::x86_fma_vfnmadd_pd:
case Intrinsic::x86_fma_vfnmadd_ps_256:
case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_512:
+ case Intrinsic::x86_fma_vfnmadd_pd_512:
Opc = X86ISD::FNMADD;
break;
case Intrinsic::x86_fma_vfnmsub_ps:
case Intrinsic::x86_fma_vfnmsub_pd:
case Intrinsic::x86_fma_vfnmsub_ps_256:
case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_512:
+ case Intrinsic::x86_fma_vfnmsub_pd_512:
Opc = X86ISD::FNMSUB;
break;
case Intrinsic::x86_fma_vfmaddsub_ps:
case Intrinsic::x86_fma_vfmaddsub_pd:
case Intrinsic::x86_fma_vfmaddsub_ps_256:
case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_512:
+ case Intrinsic::x86_fma_vfmaddsub_pd_512:
Opc = X86ISD::FMADDSUB;
break;
case Intrinsic::x86_fma_vfmsubadd_ps:
case Intrinsic::x86_fma_vfmsubadd_pd:
case Intrinsic::x86_fma_vfmsubadd_ps_256:
case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_512:
+ case Intrinsic::x86_fma_vfmsubadd_pd_512:
Opc = X86ISD::FMSUBADD;
break;
}
@@ -11386,7 +11735,87 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
}
-static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
+static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget * Subtarget) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ if (Src.getOpcode() == ISD::UNDEF)
+ Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+}
+
+static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain) {
+ SDLoc dl(Op);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
+ assert(C && "Invalid scale type");
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
+ SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Index.getValueType().getVectorNumElements());
+ SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
+ SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ return SDValue(Res, 1);
+}
+
+static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -11421,7 +11850,144 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
-
+ //int_gather(index, base, scale);
+ case Intrinsic::x86_avx512_gather_qpd_512:
+ case Intrinsic::x86_avx512_gather_qps_512:
+ case Intrinsic::x86_avx512_gather_dpd_512:
+ case Intrinsic::x86_avx512_gather_qpi_512:
+ case Intrinsic::x86_avx512_gather_qpq_512:
+ case Intrinsic::x86_avx512_gather_dpq_512:
+ case Intrinsic::x86_avx512_gather_dps_512:
+ case Intrinsic::x86_avx512_gather_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Index = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
+ SDValue Scale = Op.getOperand(4);
+ return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget);
+ }
+ //int_gather_mask(v1, mask, index, base, scale);
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ case Intrinsic::x86_avx512_gather_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_gather_qps_mask_512:
+ Opc = X86::VGATHERQPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpd_mask_512:
+ Opc = X86::VGATHERQPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpd_mask_512:
+ Opc = X86::VGATHERDPDZrm; break;
+ case Intrinsic::x86_avx512_gather_dps_mask_512:
+ Opc = X86::VGATHERDPSZrm; break;
+ case Intrinsic::x86_avx512_gather_qpi_mask_512:
+ Opc = X86::VPGATHERQDZrm; break;
+ case Intrinsic::x86_avx512_gather_qpq_mask_512:
+ Opc = X86::VPGATHERQQZrm; break;
+ case Intrinsic::x86_avx512_gather_dpi_mask_512:
+ Opc = X86::VPGATHERDDZrm; break;
+ case Intrinsic::x86_avx512_gather_dpq_mask_512:
+ Opc = X86::VPGATHERDQZrm; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Base = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ Subtarget);
+ }
+ //int_scatter(base, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ case Intrinsic::x86_avx512_scatter_dpi_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Index = Op.getOperand(3);
+ SDValue Src = Op.getOperand(4);
+ SDValue Scale = Op.getOperand(5);
+ return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain);
+ }
+ //int_scatter_mask(base, mask, index, v1, scale);
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::x86_avx512_scatter_qpd_mask_512:
+ Opc = X86::VSCATTERQPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qps_mask_512:
+ Opc = X86::VSCATTERQPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpd_mask_512:
+ Opc = X86::VSCATTERDPDZmr; break;
+ case Intrinsic::x86_avx512_scatter_dps_mask_512:
+ Opc = X86::VSCATTERDPSZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpi_mask_512:
+ Opc = X86::VPSCATTERQDZmr; break;
+ case Intrinsic::x86_avx512_scatter_qpq_mask_512:
+ Opc = X86::VPSCATTERQQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpq_mask_512:
+ Opc = X86::VPSCATTERDQZmr; break;
+ case Intrinsic::x86_avx512_scatter_dpi_mask_512:
+ Opc = X86::VPSCATTERDDZmr; break;
+ }
+ SDValue Chain = Op.getOperand(0);
+ SDValue Base = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Src = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ }
// XTEST intrinsics.
case Intrinsic::x86_xtest: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -11913,8 +12479,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask);
}
- assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
- "Only know how to lower V2I64/V4I64 multiply");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
+ "Only know how to lower V2I64/V4I64/V8I64 multiply");
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
@@ -11927,13 +12493,12 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
// AhiBlo = psllqi(AhiBlo, 32);
// return AloBlo + AloBhi + AhiBlo;
- SDValue ShAmt = DAG.getConstant(32, MVT::i32);
-
- SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
- SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+ SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+ SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+ EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
@@ -11943,14 +12508,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
- AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
- AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+ AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+ AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
-SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT EltTy = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
@@ -11972,16 +12537,26 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
if ((SplatValue != 0) &&
(SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
- unsigned lg2 = SplatValue.countTrailingZeros();
+ unsigned Lg2 = SplatValue.countTrailingZeros();
// Splat the sign bit.
- SDValue Sz = DAG.getConstant(EltTy.getSizeInBits()-1, MVT::i32);
- SDValue SGN = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, N0, Sz, DAG);
+ SmallVector<SDValue, 16> Sz(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - 1,
+ EltTy));
+ SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
+ NumElts));
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue Amt = DAG.getConstant(EltTy.getSizeInBits() - lg2, MVT::i32);
- SDValue SRL = getTargetVShiftNode(X86ISD::VSRLI, dl, VT, SGN, Amt, DAG);
+ SmallVector<SDValue, 16> Amt(NumElts,
+ DAG.getConstant(EltTy.getSizeInBits() - Lg2,
+ EltTy));
+ SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
+ NumElts));
SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
- SDValue Lg2Amt = DAG.getConstant(lg2, MVT::i32);
- SDValue SRA = getTargetVShiftNode(X86ISD::VSRAI, dl, VT, ADD, Lg2Amt, DAG);
+ SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
+ SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
+ NumElts));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
@@ -12010,23 +12585,26 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
- (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() &&
+ (VT == MVT::v8i64 || VT == MVT::v16i32))) {
if (Op.getOpcode() == ISD::SHL)
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRL)
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
if (VT == MVT::v16i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 16> V(16,
@@ -12037,8 +12615,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v8i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 16> V(16,
@@ -12069,8 +12648,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (Subtarget->hasInt256() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
SmallVector<SDValue, 32> V(32,
@@ -12081,8 +12661,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl,
+ MVT::v16i16, R, ShiftAmt,
+ DAG);
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
SmallVector<SDValue, 32> V(32,
@@ -12147,14 +12728,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRL:
- return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt,
+ DAG);
case ISD::SRA:
- return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
- DAG.getConstant(ShiftAmt, MVT::i32));
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt,
+ DAG);
}
}
@@ -12172,7 +12753,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
- VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ VT == MVT::v8i32 || VT == MVT::v16i16)) ||
+ (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
SDValue BaseShAmt;
EVT EltVT = VT.getVectorElementType();
@@ -12240,6 +12822,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRA:
@@ -12249,6 +12833,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v8i16:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
}
case ISD::SRL:
@@ -12260,6 +12846,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
case MVT::v4i64:
case MVT::v8i32:
case MVT::v16i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
}
}
@@ -12268,7 +12856,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
if (!Subtarget->is64Bit() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
+ (Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
Amt.getOpcode() == ISD::BITCAST &&
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
Amt = Amt.getOperand(0);
@@ -12297,7 +12886,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
+ SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -12316,6 +12906,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
if (V.getNode())
return V;
+ if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
+ return Op;
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
@@ -12356,8 +12948,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(4, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
@@ -12368,8 +12959,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
- M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
- DAG.getConstant(2, MVT::i32), DAG);
+ M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG);
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
@@ -12512,7 +13102,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
switch (VT.getSimpleVT().SimpleTy) {
default: return SDValue();
@@ -12546,24 +13135,34 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
// fall through
case MVT::v4i32:
case MVT::v8i16: {
- // (sext (vzext x)) -> (vsext x)
SDValue Op0 = Op.getOperand(0);
SDValue Op00 = Op0.getOperand(0);
SDValue Tmp1;
// Hopefully, this VECTOR_SHUFFLE is just a VZEXT.
if (Op0.getOpcode() == ISD::BITCAST &&
- Op00.getOpcode() == ISD::VECTOR_SHUFFLE)
- Tmp1 = LowerVectorIntExtend(Op00, DAG);
- if (Tmp1.getNode()) {
- SDValue Tmp1Op0 = Tmp1.getOperand(0);
- assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
- "This optimization is invalid without a VZEXT.");
- return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ Op00.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // (sext (vzext x)) -> (vsext x)
+ Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG);
+ if (Tmp1.getNode()) {
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ // This folding is only valid when the in-reg type is a vector of i8,
+ // i16, or i32.
+ if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 ||
+ ExtraEltVT == MVT::i32) {
+ SDValue Tmp1Op0 = Tmp1.getOperand(0);
+ assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT &&
+ "This optimization is invalid without a VZEXT.");
+ return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0));
+ }
+ Op0 = Tmp1;
+ }
}
// If the above didn't work, then just use Shift-Left + Shift-Right.
- Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG);
- return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+ Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff,
+ DAG);
+ return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff,
+ DAG);
}
}
}
@@ -12655,9 +13254,10 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
}
-SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
- EVT SrcVT = Op.getOperand(0).getValueType();
- EVT DstVT = Op.getValueType();
+static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT SrcVT = Op.getOperand(0).getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
@@ -12742,7 +13342,8 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
-SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
+static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
// For MacOSX, we want to call an alternative entry point: __sincos_stret,
@@ -12753,8 +13354,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- ArgListTy Args;
- ArgListEntry Entry;
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
@@ -12767,7 +13368,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
// the small struct {f32, f32} is returned in (eax, edx). For f64,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, NULL)
@@ -12778,7 +13380,7 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
CallingConv::C, /*isTaillCall=*/false,
/*doesNotRet=*/false, /*isReturnValueUsed*/true,
Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
// Returned in xmm0 and xmm1.
@@ -12822,9 +13424,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
- case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
- case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
- case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG);
+ case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG);
+ case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
@@ -12840,7 +13442,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::FRAME_TO_ARGS_OFFSET:
@@ -12858,7 +13461,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
- case ISD::SHL: return LowerShift(Op, DAG);
+ case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -12866,7 +13469,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
- case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
@@ -12874,7 +13477,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
+ case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
}
}
@@ -13128,6 +13731,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
+ case X86ISD::CMPM: return "X86ISD::CMPM";
+ case X86ISD::CMPMU: return "X86ISD::CMPMU";
case X86ISD::SETCC: return "X86ISD::SETCC";
case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd";
@@ -13187,6 +13792,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
+ case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
+ case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
+ case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
@@ -13200,6 +13808,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
+ case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
+ case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
@@ -13214,9 +13824,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::BLSI: return "X86ISD::BLSI";
case X86ISD::BLSMSK: return "X86ISD::BLSMSK";
case X86ISD::BLSR: return "X86ISD::BLSR";
+ case X86ISD::BZHI: return "X86ISD::BZHI";
+ case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::TESTM: return "X86ISD::TESTM";
+ case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
@@ -13239,6 +13854,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
+ case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
@@ -13422,37 +14038,46 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
EVT VT) const {
+ if (!VT.isSimple())
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
return false;
// FIXME: pshufb, blends, shifts.
- return (VT.getVectorNumElements() == 2 ||
+ return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
- isMOVLMask(M, VT) ||
- isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
- isPSHUFDMask(M, VT) ||
- isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
- isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
- isPALIGNRMask(M, VT, Subtarget) ||
- isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
- isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
+ isMOVLMask(M, SVT) ||
+ isSHUFPMask(M, SVT) ||
+ isPSHUFDMask(M, SVT) ||
+ isPSHUFHWMask(M, SVT, Subtarget->hasInt256()) ||
+ isPSHUFLWMask(M, SVT, Subtarget->hasInt256()) ||
+ isPALIGNRMask(M, SVT, Subtarget) ||
+ isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
+ isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
}
bool
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
EVT VT) const {
- unsigned NumElts = VT.getVectorNumElements();
+ if (!VT.isSimple())
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+ unsigned NumElts = SVT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
- if (NumElts == 4 && VT.is128BitVector()) {
- return (isMOVLMask(Mask, VT) ||
- isCommutedMOVLMask(Mask, VT, true) ||
- isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
- isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
+ if (NumElts == 4 && SVT.is128BitVector()) {
+ return (isMOVLMask(Mask, SVT) ||
+ isCommutedMOVLMask(Mask, SVT, true) ||
+ isSHUFPMask(Mask, SVT) ||
+ isSHUFPMask(Mask, SVT, /* Commuted */ true));
}
return false;
}
@@ -15194,6 +15819,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::CMOV_V8F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
case X86::CMOV_RFP32:
@@ -15642,7 +16270,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
}
/// PerformTruncateCombine - Converts truncate operation to
@@ -15749,6 +16377,44 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+/// Extract one bit from mask vector, like v16i1 or v8i1.
+/// AVX-512 feature.
+static SDValue ExtractBitFromMaskVector(SDNode *N, SelectionDAG &DAG) {
+ SDValue Vec = N->getOperand(0);
+ SDLoc dl(Vec);
+ MVT VecVT = Vec.getSimpleValueType();
+ SDValue Idx = N->getOperand(1);
+ MVT EltVT = N->getSimpleValueType(0);
+
+ assert((VecVT.getVectorElementType() == MVT::i1 && EltVT == MVT::i8) ||
+ "Unexpected operands in ExtractBitFromMaskVector");
+
+ // variable index
+ if (!isa<ConstantSDNode>(Idx)) {
+ MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
+ SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ ExtVT.getVectorElementType(), Ext);
+ return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
+ }
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ MVT ScalarVT = MVT::getIntegerVT(VecVT.getSizeInBits());
+ unsigned MaxShift = VecVT.getSizeInBits() - 1;
+ Vec = DAG.getNode(ISD::BITCAST, dl, ScalarVT, Vec);
+ Vec = DAG.getNode(ISD::SHL, dl, ScalarVT, Vec,
+ DAG.getConstant(MaxShift - IdxVal, ScalarVT));
+ Vec = DAG.getNode(ISD::SRL, dl, ScalarVT, Vec,
+ DAG.getConstant(MaxShift, ScalarVT));
+
+ if (VecVT == MVT::v16i1) {
+ Vec = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Vec);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Vec);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i8, Vec);
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
@@ -15759,6 +16425,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return NewOp;
SDValue InputVector = N->getOperand(0);
+
+ if (InputVector.getValueType().getVectorElementType() == MVT::i1 &&
+ !DCI.isBeforeLegalize())
+ return ExtractBitFromMaskVector(N, DAG);
+
// Detect whether we are trying to convert from mmx to i32 and the bitcast
// from mmx to v2i32 has a single usage.
if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
@@ -15846,24 +16517,28 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
}
/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
- SDValue RHS, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
+static std::pair<unsigned, bool>
+matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget) {
if (!VT.isVector())
- return 0;
+ return std::make_pair(0, false);
+ bool NeedSplit = false;
switch (VT.getSimpleVT().SimpleTy) {
- default: return 0;
+ default: return std::make_pair(0, false);
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
if (!Subtarget->hasAVX2())
- return 0;
+ NeedSplit = true;
+ if (!Subtarget->hasAVX())
+ return std::make_pair(0, false);
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
if (!Subtarget->hasSSE2())
- return 0;
+ return std::make_pair(0, false);
}
// SSE2 has only a small subset of the operations.
@@ -15874,6 +16549,7 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ unsigned Opc = 0;
// Check for x CC y ? x : y.
if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
DAG.isEqualTo(RHS, Cond.getOperand(1))) {
@@ -15881,16 +16557,16 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -15899,20 +16575,20 @@ static unsigned matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- return hasUnsigned ? X86ISD::UMAX : 0;
+ Opc = hasUnsigned ? X86ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- return hasUnsigned ? X86ISD::UMIN : 0;
+ Opc = hasUnsigned ? X86ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- return hasSigned ? X86ISD::SMAX : 0;
+ Opc = hasSigned ? X86ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- return hasSigned ? X86ISD::SMIN : 0;
+ Opc = hasSigned ? X86ISD::SMIN : 0; break;
}
}
- return 0;
+ return std::make_pair(Opc, NeedSplit);
}
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
@@ -15926,13 +16602,14 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
// ignored in unsafe-math mode).
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
+ VT != MVT::f80 && TLI.isTypeLegal(VT) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -16071,6 +16748,22 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
}
+ EVT CondVT = Cond.getValueType();
+ if (Subtarget->hasAVX512() && VT.isVector() && CondVT.isVector() &&
+ CondVT.getVectorElementType() == MVT::i1) {
+ // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
+ // lowering on AVX-512. In this case we convert it to
+ // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
+ // The same situation for all 128 and 256-bit vectors of i8 and i16
+ EVT OpVT = LHS.getValueType();
+ if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
+ DCI.AddToWorklist(Cond.getNode());
+ return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
+ }
+ }
// If this is a select between two integer constants, try to do some
// optimizations.
if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
@@ -16195,9 +16888,12 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Early exit check
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
// Match VSELECTs into subs with unsigned saturation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
// psubus is available in SSE2 and AVX2 for i8 and i16 vectors.
((Subtarget->hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) ||
(Subtarget->hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)))) {
@@ -16251,14 +16947,35 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
// Try to match a min/max vector operation.
- if (!DCI.isBeforeLegalize() &&
- N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC)
- if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
- return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
+ std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
+ unsigned Opc = ret.first;
+ bool NeedSplit = ret.second;
+
+ if (Opc && NeedSplit) {
+ unsigned NumElems = VT.getVectorNumElements();
+ // Extract the LHS vectors
+ SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
+ SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
+
+ // Extract the RHS vectors
+ SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
+ SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
+
+ // Create min/max for each subvector
+ LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
+ RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
+
+ // Merge the result
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
+ } else if (Opc)
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
+ }
// Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
- if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
- Cond.getOpcode() == ISD::SETCC) {
+ if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC &&
+ // Check if SETCC has already been promoted
+ TLI.getSetCCResultType(*DAG.getContext(), VT) == Cond.getValueType()) {
assert(Cond.getValueType().isVector() &&
"vector select expects a vector selector!");
@@ -16305,7 +17022,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// matched by one of the SSE/AVX BLEND instructions. These instructions only
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
// to simplify previous instructions.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
@@ -16314,6 +17030,15 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (BitWidth == 1)
return SDValue();
+ // Check all uses of that condition operand to check whether it will be
+ // consumed by non-BLEND instructions, which may depend on all bits are set
+ // properly.
+ for (SDNode::use_iterator I = Cond->use_begin(),
+ E = Cond->use_end(); I != E; ++I)
+ if (I->getOpcode() != ISD::VSELECT)
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
@@ -16990,33 +17715,80 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Create BLSI, and BLSR instructions
+ // Create BLSI, BLSR, and BZHI instructions
// BLSI is X & (-X)
// BLSR is X & (X-1)
- if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+ // BZHI is X & ((1 << Y) - 1)
+ // BEXTR is ((X >> imm) & (2**size-1))
+ if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- // Check LHS for neg
- if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
- isZero(N0.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
-
- // Check RHS for neg
- if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
- isZero(N1.getOperand(0)))
- return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+ if (Subtarget->hasBMI()) {
+ // Check LHS for neg
+ if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+ isZero(N0.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+ // Check RHS for neg
+ if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+ isZero(N1.getOperand(0)))
+ return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+ // Check LHS for X-1
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+ isAllOnes(N0.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+ // Check RHS for X-1
+ if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+ isAllOnes(N1.getOperand(1)))
+ return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ }
+
+ if (Subtarget->hasBMI2()) {
+ // Check for (and (add (shl 1, Y), -1), X)
+ if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::SHL) {
+ SDValue N001 = N00.getOperand(1);
+ assert(N001.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001);
+ }
+ }
- // Check LHS for X-1
- if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
- isAllOnes(N0.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+ // Check for (and X, (add (shl 1, Y), -1))
+ if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == ISD::SHL) {
+ SDValue N101 = N10.getOperand(1);
+ assert(N101.getValueType() == MVT::i8 && "unexpected type");
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
+ if (C && C->getZExtValue() == 1)
+ return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101);
+ }
+ }
+ }
- // Check RHS for X-1
- if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
- isAllOnes(N1.getOperand(1)))
- return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+ // Check for BEXTR.
+ if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
+ (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
+ ConstantSDNode *MaskNode = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShiftNode = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (MaskNode && ShiftNode) {
+ uint64_t Mask = MaskNode->getZExtValue();
+ uint64_t Shift = ShiftNode->getZExtValue();
+ if (isMask_64(Mask)) {
+ uint64_t MaskSize = CountPopulation_64(Mask);
+ if (Shift + MaskSize <= VT.getSizeInBits())
+ return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Shift | (MaskSize << 8), VT));
+ }
+ }
+ } // BEXTR
return SDValue();
}
@@ -17732,7 +18504,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
RHS.getOpcode() != ISD::VECTOR_SHUFFLE)
return false;
- MVT VT = LHS.getValueType().getSimpleVT();
+ MVT VT = LHS.getSimpleValueType();
assert((VT.is128BitVector() || VT.is256BitVector()) &&
"Unsupported vector type for horizontal add/sub");
@@ -18205,7 +18977,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
if (!Ld->isVolatile() && !N->getValueType(0).isVector() &&
ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
!XTLI->getSubtarget()->is64Bit() &&
- !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
+ VT == MVT::i64) {
SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
Ld->getChain(), Op0, DAG);
DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
@@ -18531,6 +19303,22 @@ namespace {
const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
}
+static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
+
+ if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
+ if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
+ std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
+
+ if (AsmPieces.size() == 3)
+ return true;
+ else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
+ return true;
+ }
+ }
+ return false;
+}
+
bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
@@ -18572,12 +19360,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
- return IntrinsicLowering::LowerToByteSwap(CI);
+ if (clobbersFlagRegisters(AsmPieces))
+ return IntrinsicLowering::LowerToByteSwap(CI);
}
break;
case 3:
@@ -18590,11 +19374,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
- if (AsmPieces.size() == 4 &&
- AsmPieces[0] == "~{cc}" &&
- AsmPieces[1] == "~{dirflag}" &&
- AsmPieces[2] == "~{flags}" &&
- AsmPieces[3] == "~{fpsr}")
+ if (clobbersFlagRegisters(AsmPieces))
return IntrinsicLowering::LowerToByteSwap(CI);
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2703274..bc3dd60 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -53,7 +53,7 @@ namespace llvm {
/// to X86::XORPS or X86::XORPD.
FXOR,
- /// FAND - Bitwise logical ANDNOT of floating point values. This
+ /// FANDN - Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
@@ -254,6 +254,12 @@ namespace llvm {
// VSEXT - Vector integer signed-extend.
VSEXT,
+ // VTRUNC - Vector integer truncate.
+ VTRUNC,
+
+ // VTRUNC - Vector integer truncate with mask.
+ VTRUNCM,
+
// VFPEXT - Vector FP extend.
VFPEXT,
@@ -274,6 +280,13 @@ namespace llvm {
// PCMP* - Vector integer comparisons.
PCMPEQ, PCMPGT,
+ // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+ PCMPEQM, PCMPGTM,
+
+ /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+ /// integer signed and unsigned data types.
+ CMPM,
+ CMPMU,
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
@@ -282,18 +295,23 @@ namespace llvm {
BLSI, // BLSI - Extract lowest set isolated bit
BLSMSK, // BLSMSK - Get mask up to lowest set bit
BLSR, // BLSR - Reset lowest set bit
+ BZHI, // BZHI - Zero high bits
+ BEXTR, // BEXTR - Bit field extract
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,
- // PTEST - Vector bitwise comparisons
+ // PTEST - Vector bitwise comparisons.
PTEST,
- // TESTP - Vector packed fp sign bitwise comparisons
+ // TESTP - Vector packed fp sign bitwise comparisons.
TESTP,
+ // TESTM - Vector "test" in AVX-512, the result is in a mask vector.
+ TESTM,
+
// OR/AND test for masks
KORTEST,
KTEST,
@@ -318,11 +336,13 @@ namespace llvm {
UNPCKH,
VPERMILP,
VPERMV,
+ VPERMV3,
VPERMI,
VPERM2X128,
VBROADCAST,
// masked broadcast
VBROADCASTM,
+ VINSERT,
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
@@ -755,6 +775,8 @@ namespace llvm {
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const LLVM_OVERRIDE;
+
/// \brief Reset the operation actions based on target options.
virtual void resetOperationActions();
@@ -831,8 +853,6 @@ namespace llvm {
bool isSigned,
bool isReplace) const;
- SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, SDLoc dl,
- SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
@@ -846,18 +866,12 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
@@ -881,19 +895,7 @@ namespace llvm {
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
-
- // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
- SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
- SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
- SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -925,6 +927,8 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const;
+ virtual const uint16_t *getScratchRegisters(CallingConv::ID CC) const;
+
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
/// expand, the associated machine basic block, and the associated X86
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 8abae14..cb19fbd 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -80,6 +80,21 @@ let Predicates = [HasAVX512] in {
def : Pat<(v16i16 (bitconvert (v32i8 VR256X:$src))), (v16i16 VR256X:$src)>;
}
+//
+// AVX-512: VPXOR instruction writes zero to its upper part, it's safe build zeros.
+//
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, Predicates = [HasAVX512] in {
+def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
+ [(set VR512:$dst, (v16f32 immAllZerosV))]>;
+}
+
+def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v16i32 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
+def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
@@ -376,6 +391,11 @@ def : Pat<(v16f32 (X86VBroadcast (loadf32 addr:$src))),
def : Pat<(v8f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDZrm addr:$src)>;
+def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
+ (VBROADCASTSSZrm addr:$src)>;
+def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
+ (VBROADCASTSDZrm addr:$src)>;
+
multiclass avx512_int_broadcast_reg<bits<8> opc, string OpcodeStr,
RegisterClass SrcRC, RegisterClass KRC> {
def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
@@ -402,6 +422,13 @@ def : Pat<(v16i32 (X86VBroadcast (i32 GR32:$src))),
(VPBROADCASTDrZrr GR32:$src)>;
def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))),
(VPBROADCASTQrZrr GR64:$src)>;
+def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))),
+ (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>;
+
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))),
+ (VPBROADCASTDrZrr GR32:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))),
+ (VPBROADCASTQrZrr GR64:$src)>;
multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
@@ -414,10 +441,11 @@ multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
VR128X:$src),
!strconcat(OpcodeStr,
- "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst,
(OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
EVEX, EVEX_KZ;
+ let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
@@ -425,9 +453,10 @@ multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
x86memop:$src),
!strconcat(OpcodeStr,
- "\t{$src, ${dst}{${mask}}{z}|${dst}{${mask}}{z}, $src}"),
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
(ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
+ }
}
defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
@@ -437,10 +466,20 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VT1>;
+def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
+ (VPBROADCASTDZrr VR128X:$src)>;
+def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
+ (VPBROADCASTQZrr VR128X:$src)>;
+
def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))),
(VBROADCASTSSZrr VR128X:$src)>;
def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))),
(VBROADCASTSDZrr VR128X:$src)>;
+
+def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
+ (VBROADCASTSSZrr VR128X:$src)>;
+def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
+ (VBROADCASTSDZrr VR128X:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
@@ -473,6 +512,306 @@ defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPERM
+//
+// -- immediate form --
+multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT> {
+ def ri : AVX512AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ EVEX;
+ def mi : AVX512AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, EVEX;
+}
+
+defm VPERMQZ : avx512_perm_imm<0x00, "vpermq", VR512, X86VPermi, memopv8i64,
+ i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPDZ : avx512_perm_imm<0x01, "vpermpd", VR512, X86VPermi, memopv8f64,
+ f512mem, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// -- VPERM - register form --
+multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
+
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv RC:$src1, RC:$src2)))]>, EVEX_4V;
+
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv RC:$src1, (mem_frag addr:$src2))))]>,
+ EVEX_4V;
+}
+
+defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
+ v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
+ v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
+ v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
+ v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// -- VPERM2I - 3 source operands form --
+multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ ValueType OpVT> {
+let Constraints = "$src1 = $dst" in {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv3 RC:$src1, RC:$src2, RC:$src3)))]>,
+ EVEX_4V;
+
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (X86VPermv3 RC:$src1, RC:$src2,
+ (mem_frag addr:$src3))))]>, EVEX_4V;
+ }
+}
+defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem,
+ v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem,
+ v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem,
+ v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
+ v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - BLEND using mask
+//
+multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, Intrinsic Int,
+ RegisterClass KRC, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2),
+ (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
+ def rr_Int : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
+ [(set RC:$dst, (Int KRC:$mask, (vt RC:$src2),
+ (vt RC:$src1)))]>, EVEX_4V, EVEX_K;
+
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
+ []>,
+ EVEX_4V, EVEX_K;
+
+ def rm_Int : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $mask, $dst|$dst, $mask, $src1, $src2}"),
+ [(set RC:$dst, (Int KRC:$mask, (vt RC:$src1),
+ (mem_frag addr:$src2)))]>,
+ EVEX_4V, EVEX_K;
+ }
+}
+
+let ExeDomain = SSEPackedSingle in
+defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
+ int_x86_avx512_mskblend_ps_512,
+ VK16WM, VR512, f512mem,
+ memopv16f32, vselect, v16f32>,
+ EVEX_CD8<32, CD8VF>, EVEX_V512;
+let ExeDomain = SSEPackedDouble in
+defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
+ int_x86_avx512_mskblend_pd_512,
+ VK8WM, VR512, f512mem,
+ memopv8f64, vselect, v8f64>,
+ VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
+
+defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
+ int_x86_avx512_mskblend_d_512,
+ VK16WM, VR512, f512mem,
+ memopv16i32, vselect, v16i32>,
+ EVEX_CD8<32, CD8VF>, EVEX_V512;
+
+defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
+ int_x86_avx512_mskblend_q_512,
+ VK8WM, VR512, f512mem,
+ memopv8i64, vselect, v8i64>,
+ VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
+
+let Predicates = [HasAVX512] in {
+def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
+ (v8f32 VR256X:$src2))),
+ (EXTRACT_SUBREG
+ (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
+ (v8i32 VR256X:$src2))),
+ (EXTRACT_SUBREG
+ (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+}
+
+multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX512BI<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2)))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+}
+
+defm VPCMPEQDZ : avx512_icmp_packed<0x76, "vpcmpeqd", VK16, VR512, i512mem,
+ memopv16i32, X86pcmpeqm, v16i32>, EVEX_V512;
+defm VPCMPEQQZ : avx512_icmp_packed<0x29, "vpcmpeqq", VK8, VR512, i512mem,
+ memopv8i64, X86pcmpeqm, v8i64>, T8, EVEX_V512, VEX_W;
+
+defm VPCMPGTDZ : avx512_icmp_packed<0x66, "vpcmpgtd", VK16, VR512, i512mem,
+ memopv16i32, X86pcmpgtm, v16i32>, EVEX_V512;
+defm VPCMPGTQZ : avx512_icmp_packed<0x37, "vpcmpgtq", VK8, VR512, i512mem,
+ memopv8i64, X86pcmpgtm, v8i64>, T8, EVEX_V512, VEX_W;
+
+def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (VPCMPGTDZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+
+def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
+ (COPY_TO_REGCLASS (VPCMPEQDZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+
+multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt, Operand CC, string asm,
+ string asm_alt> {
+ def rri : AVX512AIi8<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
+ IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi : AVX512AIi8<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
+ imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rri_alt : AVX512AIi8<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ }
+}
+
+defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32,
+ X86cmpm, v16i32, AVXCC,
+ "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32,
+ X86cmpmu, v16i32, AVXCC,
+ "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
+ X86cmpm, v8i64, AVXCC,
+ "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
+ X86cmpmu, v8i64, AVXCC,
+ "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// avx512_cmp_packed - sse 1 & 2 compare packed instructions
+multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
+ X86MemOperand x86memop, Operand CC,
+ SDNode OpNode, ValueType vt, string asm,
+ string asm_alt, Domain d> {
+ def rri : AVX512PIi8<0xC2, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], d>;
+ def rmi : AVX512PIi8<0xC2, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ [(set KRC:$dst,
+ (OpNode (vt RC:$src1), (memop addr:$src2), imm:$cc))], d>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+ let neverHasSideEffects = 1 in {
+ def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+ def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ asm_alt, [], d>;
+ }
+}
+
+defm VCMPPSZ : avx512_cmp_packed<VK16, VR512, f512mem, AVXCC, X86cmpm, v16f32,
+ "vcmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedSingle>, EVEX_4V, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VCMPPDZ : avx512_cmp_packed<VK8, VR512, f512mem, AVXCC, X86cmpm, v8f64,
+ "vcmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "vcmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+ SSEPackedDouble>, OpSize, EVEX_4V, VEX_W, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VCMPPSZrri
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VPCMPDZrri
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
+ (COPY_TO_REGCLASS (VPCMPUDZrri
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+ imm:$cc), VK8)>;
+
// Mask register copy, including
// - copy between mask registers
// - load/store mask registers
@@ -713,3 +1052,2475 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Aligned and unaligned load and store
+//
+
+multiclass avx512_mov_packed<bits<8> opc, RegisterClass RC, RegisterClass KRC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+let neverHasSideEffects = 1 in
+ def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>,
+ EVEX;
+let canFoldAsLoad = 1 in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>, EVEX;
+let Constraints = "$src1 = $dst" in {
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
+ EVEX, EVEX_K;
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [], d>, EVEX, EVEX_K;
+}
+}
+
+defm VMOVAPSZ : avx512_mov_packed<0x28, VR512, VK16WM, f512mem, alignedloadv16f32,
+ "vmovaps", SSEPackedSingle>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVAPDZ : avx512_mov_packed<0x28, VR512, VK8WM, f512mem, alignedloadv8f64,
+ "vmovapd", SSEPackedDouble>,
+ OpSize, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VMOVUPSZ : avx512_mov_packed<0x10, VR512, VK16WM, f512mem, loadv16f32,
+ "vmovups", SSEPackedSingle>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVUPDZ : avx512_mov_packed<0x10, VR512, VK8WM, f512mem, loadv8f64,
+ "vmovupd", SSEPackedDouble>,
+ OpSize, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+def VMOVAPSZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore512 (v16f32 VR512:$src), addr:$dst)],
+ SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVAPDZmr : AVX512PI<0x29, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore512 (v8f64 VR512:$src), addr:$dst)],
+ SSEPackedDouble>, EVEX, EVEX_V512,
+ OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+def VMOVUPSZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovups\t{$src, $dst|$dst, $src}",
+ [(store (v16f32 VR512:$src), addr:$dst)],
+ SSEPackedSingle>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$src),
+ "vmovupd\t{$src, $dst|$dst, $src}",
+ [(store (v8f64 VR512:$src), addr:$dst)],
+ SSEPackedDouble>, EVEX, EVEX_V512,
+ OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+let neverHasSideEffects = 1 in {
+ def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512;
+ def VMOVDQA64rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst),
+ (ins VR512:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W;
+let mayStore = 1 in {
+ def VMOVDQA32mr : AVX512BI<0x7F, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def VMOVDQA64mr : AVX512BI<0x7F, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+let mayLoad = 1 in {
+def VMOVDQA32rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovdqa32\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovdqa64\t{$src, $dst|$dst, $src}", []>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+}
+
+// 512-bit aligned load/store
+def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>;
+def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>;
+
+def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQA64mr addr:$dst, VR512:$src)>;
+def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQA32mr addr:$dst, VR512:$src)>;
+
+multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm,
+ RegisterClass RC, RegisterClass KRC,
+ PatFrag ld_frag, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in
+ def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
+let canFoldAsLoad = 1 in
+ def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))]>, EVEX;
+let mayStore = 1 in
+ def mr : AVX512XSI<store_opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, VR512:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX;
+let Constraints = "$src1 = $dst" in {
+ def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>,
+ EVEX, EVEX_K;
+ def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(asm,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+}
+
+defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM,
+ memopv16i32, i512mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM,
+ memopv8i64, i512mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// 512-bit unaligned load/store
+def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>;
+def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>;
+
+def : Pat<(store (v8i64 VR512:$src), addr:$dst),
+ (VMOVDQU64mr addr:$dst, VR512:$src)>;
+def : Pat<(store (v16i32 VR512:$src), addr:$dst),
+ (VMOVDQU32mr addr:$dst, VR512:$src)>;
+
+let AddedComplexity = 20 in {
+def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1),
+ (v16f32 VR512:$src2))),
+ (VMOVUPSZrrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
+def : Pat<(v8f64 (vselect VK8WM:$mask, (v8f64 VR512:$src1),
+ (v8f64 VR512:$src2))),
+ (VMOVUPDZrrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 VR512:$src1),
+ (v16i32 VR512:$src2))),
+ (VMOVDQU32rrk VR512:$src2, VK16WM:$mask, VR512:$src1)>;
+def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
+ (v8i64 VR512:$src2))),
+ (VMOVDQU64rrk VR512:$src2, VK8WM:$mask, VR512:$src1)>;
+}
+// Move Int Doubleword to Packed Double Int
+//
+def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+ EVEX, VEX_LIG;
+def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v2i64 (scalar_to_vector GR64:$src)))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
+let isCodeGenOnly = 1 in {
+def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert GR64:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+}
+def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
+ EVEX_CD8<64, CD8VT1>;
+
+// Move Int Doubleword to Single Scalar
+//
+let isCodeGenOnly = 1 in {
+def VMOVDI2SSZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
+
+def VMOVDI2SSZrm : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
+
+// Move Packed Doubleword Int to Packed Double Int
+//
+def VMOVPDI2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
+ (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
+ EVEX, VEX_LIG;
+def VMOVPDI2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128X:$src),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+
+// Move Packed Doubleword Int first element to Doubleword Int
+//
+def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
+ (iPTR 0)))],
+ IIC_SSE_MOVD_ToGP>, TB, OpSize, EVEX, VEX_LIG, VEX_W,
+ Requires<[HasAVX512, In64BitMode]>;
+
+def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
+ addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, OpSize, VEX_LIG, VEX_W, TB, EVEX_CD8<64, CD8VT1>,
+ Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+
+// Move Scalar Single to Double Int
+//
+let isCodeGenOnly = 1 in {
+def VMOVSS2DIZrr : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst),
+ (ins FR32X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32X:$src))],
+ IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
+def VMOVSS2DIZmr : AVX512SI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, FR32X:$src),
+ "vmovd{z}\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
+
+// Move Quadword Int to Packed Quadword Int
+//
+def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i64mem:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
+ EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 MOVSS, MOVSD
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_move_scalar <string asm, RegisterClass RC,
+ SDNode OpNode, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_pat> {
+ def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
+ (scalar_to_vector RC:$src2))))],
+ IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
+ def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
+ EVEX, VEX_LIG;
+ def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+ EVEX, VEX_LIG;
+}
+
+let ExeDomain = SSEPackedSingle in
+defm VMOVSSZ : avx512_move_scalar<"movss{z}", FR32X, X86Movss, v4f32, f32mem,
+ loadf32>, XS, EVEX_CD8<32, CD8VT1>;
+
+let ExeDomain = SSEPackedDouble in
+defm VMOVSDZ : avx512_move_scalar<"movsd{z}", FR64X, X86Movsd, v2f64, f64mem,
+ loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+
+// For the disassembler
+let isCodeGenOnly = 1 in {
+ def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR32X:$src2),
+ "movss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
+ XS, EVEX_4V, VEX_LIG;
+ def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, FR64X:$src2),
+ "movsd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ IIC_SSE_MOV_S_RR>,
+ XD, EVEX_4V, VEX_LIG, VEX_W;
+}
+
+let Predicates = [HasAVX512] in {
+ let AddedComplexity = 15 in {
+ // Move scalar to XMM zero-extended, zeroing a VR128X then do a
+ // MOVS{S,D} to the lower bits.
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
+ (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
+ def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
+ (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
+ (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
+ (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+
+ // Move low f32 and clear high bits.
+ def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (v4f32 (V_SET0)),
+ (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
+ }
+
+ let AddedComplexity = 20 in {
+ // MOVSSrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+ def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+
+ // MOVSDrm zeros the high parts of the register; represent this
+ // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
+ def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+ def : Pat<(v2f64 (X86vzload addr:$src)),
+ (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
+
+ // Represent the same patterns above but in the form they appear for
+ // 256-bit types
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
+ }
+ def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+ (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
+ FR32X:$src)), sub_xmm)>;
+ def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+ (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
+ FR64X:$src)), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (v2f64 (V_SET0)),
+ (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
+
+ // Extract and store.
+ def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
+ def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
+ addr:$dst),
+ (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
+
+ // Shuffle with VMOVSS
+ def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
+ (VMOVSSZrr (v4i32 VR128X:$src1),
+ (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
+ def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
+ (VMOVSSZrr (v4f32 VR128X:$src1),
+ (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
+
+ // 256-bit variants
+ def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+
+ // Shuffle with VMOVSD
+ def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+
+ // 256-bit variants
+ def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+ def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
+ (SUBREG_TO_REG (i32 0),
+ (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
+ (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
+ sub_xmm)>;
+
+ def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+ def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
+ (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
+}
+
+let AddedComplexity = 15 in
+def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
+ (v2i64 VR128X:$src))))],
+ IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
+
+let AddedComplexity = 20 in
+def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i128mem:$src),
+ "vmovq{z}\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))],
+ IIC_SSE_MOVDQ>, EVEX, VEX_W,
+ EVEX_CD8<8, CD8VT8>;
+
+let Predicates = [HasAVX512] in {
+ // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIZrr GR64:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIZrr GR32:$src)>;
+
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVZPQILo2PQIZrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
+ (VMOVZPQILo2PQIZrr VR128X:$src)>;
+ }
+
+ // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
+}
+
+def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
+
+def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Integer arithmetic
+//
+multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, PatFrag scalar_mfrag,
+ X86MemOperand x86scalar_mop, string BrdcstStr,
+ OpndItins itins, bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ itins.rr>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (memop_frag addr:$src2))))],
+ itins.rm>, EVEX_4V;
+ def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src2)))))],
+ itins.rm>, EVEX_4V, EVEX_B;
+}
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
+ ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0> {
+ let isCommutable = IsCommutable in
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W;
+}
+
+defm VPADDDZ : avx512_binop_rm<0xFE, "vpaddd", add, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPSUBDZ : avx512_binop_rm<0xFA, "vpsubd", sub, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 0>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPMULLDZ : avx512_binop_rm<0x40, "vpmulld", mul, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPADDQZ : avx512_binop_rm<0xD4, "vpaddq", add, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_W;
+
+defm VPSUBQZ : avx512_binop_rm<0xFB, "vpsubq", sub, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32,
+ VR512, memopv8i64, i512mem, SSE_INTALU_ITINS_P, 1>, T8,
+ EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32,
+ VR512, memopv8i64, i512mem, SSE_INTMUL_ITINS_P, 1>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
+ (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
+
+defm VPMAXUDZ : avx512_binop_rm<0x3F, "vpmaxud", X86umax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXUQZ : avx512_binop_rm<0x3F, "vpmaxuq", X86umax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMAXSDZ : avx512_binop_rm<0x3D, "vpmaxsd", X86smax, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMAXSQZ : avx512_binop_rm<0x3D, "vpmaxsq", X86smax, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINUDZ : avx512_binop_rm<0x3B, "vpminud", X86umin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINUQZ : avx512_binop_rm<0x3B, "vpminuq", X86umin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VPMINSDZ : avx512_binop_rm<0x39, "vpminsd", X86smin, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_INTALU_ITINS_P, 1>,
+ T8, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPMINSQZ : avx512_binop_rm<0x39, "vpminsq", X86smin, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_INTALU_ITINS_P, 0>,
+ T8, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : AVX512PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))],
+ d>, EVEX_4V;
+ def rm : AVX512PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (bitconvert (mem_frag addr:$src2)))))],
+ d>, EVEX_4V;
+}
+
+defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, memopv8f64,
+ VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, memopv8f64,
+ VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, memopv8f64,
+ VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, memopv8f64,
+ VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))],
+ IIC_SSE_UNPCK>, EVEX_4V;
+}
+defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
+ VR512, memopv16i32, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
+ VR512, memopv8i64, i512mem>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+//===----------------------------------------------------------------------===//
+// AVX-512 - PSHUFD
+//
+
+multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT> {
+ def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1, (i8 imm:$src2))))]>,
+ EVEX;
+ def mi : AVX512Ii8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (OpVT (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2))))]>, EVEX;
+}
+
+defm VPSHUFDZ : avx512_pshuf_imm<0x70, "vpshufd", VR512, X86PShufd, memopv16i32,
+ i512mem, v16i32>, OpSize, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+let ExeDomain = SSEPackedSingle in
+defm VPERMILPSZ : avx512_pshuf_imm<0x04, "vpermilps", VR512, X86VPermilp,
+ memopv16f32, i512mem, v16f32>, OpSize, TA, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+let ExeDomain = SSEPackedDouble in
+defm VPERMILPDZ : avx512_pshuf_imm<0x05, "vpermilpd", VR512, X86VPermilp,
+ memopv8f64, i512mem, v8f64>, OpSize, TA, EVEX_V512,
+ VEX_W, EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v16i32 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPSZri VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86VPermilp VR512:$src1, (i8 imm:$imm))),
+ (VPERMILPDZri VR512:$src1, imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Logical Instructions
+//===----------------------------------------------------------------------===//
+
+defm VPANDDZ : avx512_binop_rm<0xDB, "vpandd", and, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPANDQZ : avx512_binop_rm<0xDB, "vpandq", and, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPORDZ : avx512_binop_rm<0xEB, "vpord", or, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPORQZ : avx512_binop_rm<0xEB, "vporq", or, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPXORDZ : avx512_binop_rm<0xEF, "vpxord", xor, v16i32, VR512, memopv16i32,
+ i512mem, loadi32, i32mem, "{1to16}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPXORQZ : avx512_binop_rm<0xEF, "vpxorq", xor, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 1>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPANDNDZ : avx512_binop_rm<0xDF, "vpandnd", X86andnp, v16i32, VR512,
+ memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
+ SSE_BIT_ITINS_P, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPANDNQZ : avx512_binop_rm<0xDF, "vpandnq", X86andnp, v8i64, VR512, memopv8i64,
+ i512mem, loadi64, i64mem, "{1to8}", SSE_BIT_ITINS_P, 0>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 FP arithmetic
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss{z}"), OpNode, FR32X,
+ f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd{z}"), OpNode, FR64X,
+ f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>;
+}
+
+let isCommutable = 1 in {
+defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
+defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
+defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
+defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
+}
+let isCommutable = 0 in {
+defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
+defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
+}
+
+multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr,
+ Domain d, OpndItins itins, bit commutable> {
+ let isCommutable = commutable in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
+ EVEX_4V, TB;
+ let mayLoad = 1 in {
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
+ itins.rm, d>, EVEX_4V, TB;
+ def rmb : PI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (vt (X86VBroadcast (scalar_mfrag addr:$src2)))))],
+ itins.rm, d>, EVEX_4V, EVEX_B, TB;
+ }
+}
+
+defm VADDPSZ : avx512_fp_packed<0x58, "addps", fadd, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VADDPDZ : avx512_fp_packed<0x58, "addpd", fadd, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMULPSZ : avx512_fp_packed<0x59, "mulps", fmul, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMULPDZ : avx512_fp_packed<0x59, "mulpd", fmul, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMINPSZ : avx512_fp_packed<0x5D, "minps", X86fmin, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VMAXPSZ : avx512_fp_packed<0x5F, "maxps", X86fmax, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 1>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VMINPDZ : avx512_fp_packed<0x5D, "minpd", X86fmin, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VMAXPDZ : avx512_fp_packed<0x5F, "maxpd", X86fmax, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 1>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VSUBPSZ : avx512_fp_packed<0x5C, "subps", fsub, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VDIVPSZ : avx512_fp_packed<0x5E, "divps", fdiv, VR512, v16f32, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}", SSEPackedSingle,
+ SSE_ALU_ITINS_P.s, 0>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VSUBPDZ : avx512_fp_packed<0x5C, "subpd", fsub, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 0>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}", SSEPackedDouble,
+ SSE_ALU_ITINS_P.d, 0>,
+ EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 VPTESTM instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+ SDNode OpNode, ValueType vt> {
+ def rr : AVX5128I<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
+ def rm : AVX5128I<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set KRC:$dst, (OpNode (vt RC:$src1),
+ (bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
+}
+
+defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
+ memopv16i32, X86testm, v16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem,
+ memopv8i64, X86testm, v8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Shift instructions
+//===----------------------------------------------------------------------===//
+multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode, RegisterClass RC,
+ ValueType vt, X86MemOperand x86memop, PatFrag mem_frag,
+ RegisterClass KRC> {
+ def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (i8 imm:$src2))))],
+ SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
+ def rik : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
+ def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
+ (ins x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src1),
+ (i8 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
+ def mik: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
+ (ins KRC:$mask, x86memop:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+}
+
+multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt, ValueType SrcVT,
+ PatFrag bc_frag, RegisterClass KRC> {
+ // src2 is always 128-bit
+ def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
+ SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
+ def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V, EVEX_K;
+ def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (vt (OpNode RC:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))],
+ SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
+ def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
+ [], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V, EVEX_K;
+}
+
+defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
+ VR512, v16i32, i512mem, memopv16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
+ VR512, v16i32, v4i32, bc_v4i32, VK16WM>, EVEX_V512,
+ EVEX_CD8<32, CD8VQ>;
+
+defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
+ VR512, v8i64, i512mem, memopv8i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
+ VR512, v8i64, v2i64, bc_v2i64, VK8WM>, EVEX_V512,
+ EVEX_CD8<64, CD8VQ>, VEX_W;
+
+//===-------------------------------------------------------------------===//
+// Variable Bit Shifts
+//===-------------------------------------------------------------------===//
+multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (vt (OpNode RC:$src1, (vt RC:$src2))))]>,
+ EVEX_4V;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
+ EVEX_4V;
+}
+
+defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
+ i512mem, memopv16i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
+ i512mem, memopv8i64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - MOVDDUP
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
+ X86MemOperand x86memop, PatFrag memop_frag> {
+def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
+def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst,
+ (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
+}
+
+defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, memopv8f64>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPZrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
+ ValueType vt, RegisterClass RC, PatFrag mem_frag,
+ X86MemOperand x86memop> {
+ def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
+ let mayLoad = 1 in
+ def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
+}
+
+defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
+ v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
+ v16f32, VR512, memopv16f32, f512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
+def : Pat<(v16i32 (X86Movshdup (memopv16i32 addr:$src))),
+ (VMOVSHDUPZrm addr:$src)>;
+def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
+def : Pat<(v16i32 (X86Movsldup (memopv16i32 addr:$src))),
+ (VMOVSLDUPZrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Move Low to High and High to Low packed FP Instructions
+//===----------------------------------------------------------------------===//
+def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovlhps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovhlps{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+
+let Predicates = [HasAVX512] in {
+ // MOVLHPS patterns
+ def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
+ (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
+ def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
+ (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
+
+ // MOVHLPS patterns
+ def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
+ (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
+}
+
+//===----------------------------------------------------------------------===//
+// FMA - Fused Multiply Operations
+//
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+ def r: AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst, (OpVT(OpNode RC:$src1, RC:$src2, RC:$src3)))]>;
+
+ let mayLoad = 1 in
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3))))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86scalar_mop:$src3),
+ !strconcat(OpcodeStr, "\t{${src3}", BrdcstStr,
+ ", $src2, $dst|$dst, $src2, ${src3}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src3)))))]>, EVEX_B;
+}
+} // Constraints = "$src1 = $dst"
+
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmaddsub, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsubadd, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmadd, v8f64>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag,
+ string BrdcstStr, SDNode OpNode, ValueType OpVT> {
+ let mayLoad = 1 in
+ def m: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src3, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src3, $dst|$dst, $src3, $src2}"),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3)))]>;
+ def mb: AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src3, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", $src3, $dst|$dst, $src3, ${src2}", BrdcstStr, "}"),
+ [(set RC:$dst, (OpNode RC:$src1,
+ (OpVT (X86VBroadcast (scalar_mfrag addr:$src2))), RC:$src3))]>, EVEX_B;
+}
+} // Constraints = "$src1 = $dst"
+
+
+let ExeDomain = SSEPackedSingle in {
+ defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmaddsub, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fmsubadd, v16f32>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmadd, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+ defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem,
+ memopv16f32, f32mem, loadf32, "{1to16}",
+ X86Fnmsub, v16f32>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+}
+let ExeDomain = SSEPackedDouble in {
+ defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmadd, v8f64>, EVEX_V512,
+ VEX_W, EVEX_CD8<64, CD8VF>;
+ defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmaddsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fmsubadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmadd, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+ defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem,
+ memopv8f64, f64mem, loadf64, "{1to8}",
+ X86Fnmsub, v8f64>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+}
+
+// Scalar FMA
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop,
+ PatFrag mem_frag> {
+ let isCommutable = 1 in
+ def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ let mayLoad = 1 in
+ def m : AVX512FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, f128mem:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src2, RC:$src1,
+ (mem_frag addr:$src3))))]>;
+}
+
+} // Constraints = "$src1 = $dst"
+
+defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss{z}", X86Fmadd, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFMADDSDZ : avx512_fma3s_rm<0xA9, "vfmadd213sd{z}", X86Fmadd, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFMSUBSSZ : avx512_fma3s_rm<0xAB, "vfmsub213ss{z}", X86Fmsub, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFMSUBSDZ : avx512_fma3s_rm<0xAB, "vfmsub213sd{z}", X86Fmsub, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFNMADDSSZ : avx512_fma3s_rm<0xAD, "vfnmadd213ss{z}", X86Fnmadd, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFNMADDSDZ : avx512_fma3s_rm<0xAD, "vfnmadd213sd{z}", X86Fnmadd, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VFNMSUBSSZ : avx512_fma3s_rm<0xAF, "vfnmsub213ss{z}", X86Fnmsub, FR32X,
+ f32, f32mem, ssmem, loadf32>, EVEX_CD8<32, CD8VT1>;
+defm VFNMSUBSDZ : avx512_fma3s_rm<0xAF, "vfnmsub213sd{z}", X86Fnmsub, FR64X,
+ f64, f64mem, sdmem, loadf64>, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Scalar convert from sign integer to float/double
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vcvtsi<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+let neverHasSideEffects = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ EVEX_4V;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
+ EVEX_4V;
+} // neverHasSideEffects = 1
+}
+let Predicates = [HasAVX512] in {
+defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}{z}">,
+ XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SSZ : avx512_vcvtsi<0x2A, GR64, FR32X, i64mem, "cvtsi2ss{q}{z}">,
+ XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, GR32, FR64X, i32mem, "cvtsi2sd{l}{z}">,
+ XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTSI642SDZ : avx512_vcvtsi<0x2A, GR64, FR64X, i64mem, "cvtsi2sd{q}{z}">,
+ XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+
+def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+def : Pat<(f32 (sint_to_fp GR32:$src)),
+ (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f32 (sint_to_fp GR64:$src)),
+ (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+def : Pat<(f64 (sint_to_fp GR32:$src)),
+ (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f64 (sint_to_fp GR64:$src)),
+ (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+
+defm VCVTUSI2SSZ : avx512_vcvtsi<0x7B, GR32, FR32X, i32mem, "cvtusi2ss{l}{z}">,
+ XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SSZ : avx512_vcvtsi<0x7B, GR64, FR32X, i64mem, "cvtusi2ss{q}{z}">,
+ XS, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, GR32, FR64X, i32mem, "cvtusi2sd{l}{z}">,
+ XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+defm VCVTUSI642SDZ : avx512_vcvtsi<0x7B, GR64, FR64X, i64mem, "cvtusi2sd{q}{z}">,
+ XD, VEX_W, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+
+def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+def : Pat<(f32 (uint_to_fp GR32:$src)),
+ (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f32 (uint_to_fp GR64:$src)),
+ (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+def : Pat<(f64 (uint_to_fp GR32:$src)),
+ (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+def : Pat<(f64 (uint_to_fp GR64:$src)),
+ (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Scalar convert from float/double to integer
+//===----------------------------------------------------------------------===//
+multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
+ string asm> {
+let neverHasSideEffects = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+} // neverHasSideEffects = 1
+}
+let Predicates = [HasAVX512] in {
+// Convert float/double to signed/unsigned int 32/64
+defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+ ssmem, sse_load_f32, "cvtss2si{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+ ssmem, sse_load_f32, "cvtss2si{z}">,
+ XS, VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+ ssmem, sse_load_f32, "cvtss2usi{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+ int_x86_avx512_cvtss2usi64, ssmem,
+ sse_load_f32, "cvtss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+ sdmem, sse_load_f64, "cvtsd2si{z}">,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+ sdmem, sse_load_f64, "cvtsd2si{z}">,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+ sdmem, sse_load_f64, "cvtsd2usi{z}">,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+ int_x86_avx512_cvtsd2usi64, sdmem,
+ sse_load_f64, "cvtsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+
+defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V;
+defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
+defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V;
+defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
+
+defm Int_VCVTUSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_avx512_cvtusi2ss, i32mem, loadi32, "cvtusi2ss{l}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V;
+defm Int_VCVTUSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_avx512_cvtusi642ss, i64mem, loadi64, "cvtusi2ss{q}{z}",
+ SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
+defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
+ int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V;
+defm Int_VCVTUSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
+ int_x86_avx512_cvtusi642sd, i64mem, loadi64, "cvtusi2sd{q}{z}",
+ SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
+
+// Convert float/double to signed/unsigned int 32/64 with truncation
+defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
+ ssmem, sse_load_f32, "cvttss2si{z}">,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
+ int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+ "cvttss2si{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
+ sdmem, sse_load_f64, "cvttsd2si{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
+ int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+ "cvttsd2si{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
+ int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
+ "cvttss2si{z}">, XS, EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
+ int_x86_avx512_cvttss2usi64, ssmem,
+ sse_load_f32, "cvttss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
+ int_x86_avx512_cvttsd2usi,
+ sdmem, sse_load_f64, "cvttsd2usi{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
+ int_x86_avx512_cvttsd2usi64, sdmem,
+ sse_load_f64, "cvttsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+}
+
+multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
+}
+
+defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
+ loadf32, "cvttss2si{z}">, XS,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
+ loadf32, "cvttss2usi{z}">, XS,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
+ loadf32, "cvttss2si{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
+ loadf32, "cvttss2usi{z}">, XS, VEX_W,
+ EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
+ loadf64, "cvttsd2si{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
+ loadf64, "cvttsd2usi{z}">, XD,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
+ loadf64, "cvttsd2si{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
+ loadf64, "cvttsd2usi{z}">, XD, VEX_W,
+ EVEX_CD8<64, CD8VT1>;
+//===----------------------------------------------------------------------===//
+// AVX-512 Convert form float to double and back
+//===----------------------------------------------------------------------===//
+let neverHasSideEffects = 1 in {
+def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ "vcvtss2sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+ EVEX_CD8<32, CD8VT1>;
+
+// Convert scalar double to scalar single
+def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
+let mayLoad = 1 in
+def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ "vcvtsd2ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX_4V, VEX_LIG, VEX_W,
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
+}
+
+def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
+ Requires<[HasAVX512]>;
+def : Pat<(fextend (loadf32 addr:$src)),
+ (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX512, OptForSize]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
+ Requires<[HasAVX512, OptForSpeed]>;
+
+def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+ Requires<[HasAVX512]>;
+
+multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
+ RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
+ Domain d> {
+let neverHasSideEffects = 1 in {
+ def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
+ let mayLoad = 1 in
+ def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
+} // neverHasSideEffects = 1
+}
+
+defm VCVTPD2PSZ : avx512_vcvt_fp<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
+ memopv8f64, f512mem, v8f32, v8f64,
+ SSEPackedSingle>, EVEX_V512, VEX_W, OpSize,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
+ memopv4f64, f256mem, v8f64, v8f32,
+ SSEPackedDouble>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+def : Pat<(v8f64 (extloadv8f32 addr:$src)),
+ (VCVTPS2PDZrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 Vector convert from sign integer to float/double
+//===----------------------------------------------------------------------===//
+
+defm VCVTDQ2PSZ : avx512_vcvt_fp<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
+ memopv8i64, i512mem, v16f32, v16i32,
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
+ memopv4i64, i256mem, v8f64, v8i32,
+ SSEPackedDouble>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
+ memopv16f32, f512mem, v16i32, v16f32,
+ SSEPackedSingle>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
+ memopv8f64, f512mem, v8i32, v8f64,
+ SSEPackedDouble>, EVEX_V512, OpSize, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
+ memopv16f32, f512mem, v16i32, v16f32,
+ SSEPackedSingle>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
+ memopv8f64, f512mem, v8i32, v8f64,
+ SSEPackedDouble>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
+ memopv4i64, f256mem, v8f64, v8i32,
+ SSEPackedDouble>, EVEX_V512, XS,
+ EVEX_CD8<32, CD8VH>;
+
+defm VCVTUDQ2PSZ : avx512_vcvt_fp<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
+ memopv16i32, f512mem, v16f32, v16i32,
+ SSEPackedSingle>, EVEX_V512, XD,
+ EVEX_CD8<32, CD8VF>;
+
+def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
+ (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
+ (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+
+def : Pat<(int_x86_avx512_cvtdq2_ps_512 VR512:$src),
+ (VCVTDQ2PSZrr VR512:$src)>;
+def : Pat<(int_x86_avx512_cvtdq2_ps_512 (bitconvert (memopv8i64 addr:$src))),
+ (VCVTDQ2PSZrm addr:$src)>;
+
+def VCVTPS2DQZrr : AVX512BI<0x5B, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ "vcvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_cvt_ps2dq_512 VR512:$src))],
+ IIC_SSE_CVT_PS_RR>, EVEX, EVEX_V512;
+def VCVTPS2DQZrm : AVX512BI<0x5B, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ "vcvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_cvt_ps2dq_512 (memopv16f32 addr:$src)))],
+ IIC_SSE_CVT_PS_RM>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
+ (VCVTPD2PSZrm addr:$src)>;
+ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
+ (VCVTPS2PDZrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half precision conversion instructions
+//===----------------------------------------------------------------------===//
+multiclass avx512_f16c_ph2ps<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop, Intrinsic Int> {
+ def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}",
+ [(set destRC:$dst, (Int srcRC:$src))]>, EVEX;
+ let neverHasSideEffects = 1, mayLoad = 1 in
+ def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
+ "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
+}
+
+multiclass avx512_f16c_ps2ph<RegisterClass destRC, RegisterClass srcRC,
+ X86MemOperand x86memop, Intrinsic Int> {
+ def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
+ (ins srcRC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set destRC:$dst, (Int srcRC:$src1, imm:$src2))]>, EVEX;
+ let neverHasSideEffects = 1, mayStore = 1 in
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, srcRC:$src1, i32i8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+}
+
+defm VCVTPH2PSZ : avx512_f16c_ph2ps<VR512, VR256X, f256mem,
+ int_x86_avx512_vcvtph2ps_512>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PHZ : avx512_f16c_ps2ph<VR256X, VR512, f256mem,
+ int_x86_avx512_vcvtps2ph_512>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+ defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
+ "ucomiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
+ "ucomisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ let Pattern = []<dag> in {
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+ "comiss{z}">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+ "comisd{z}">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+ defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
+ load, "ucomiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
+ load, "ucomisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+ defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
+ load, "comiss">, TB, EVEX, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>;
+ defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
+ load, "comisd">, TB, OpSize, EVEX,
+ VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
+
+/// avx512_unop_p - AVX-512 unops in packed form.
+multiclass avx512_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512;
+ def PSZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode (memopv16f32 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_p_int - AVX-512 intrinsics unops in packed forms.
+multiclass avx512_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V16F32Int, Intrinsic V8F64Int> {
+ def PSZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+/// avx512_fp_unop_s - AVX-512 unops in scalar form.
+multiclass avx512_fp_unop_s<bits<8> opc, string OpcodeStr> {
+ let hasSideEffects = 0 in {
+ def SSZr : AVX5128I<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : AVX5128I<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : AVX5128I<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : AVX5128I<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def SDZm_Int : AVX5128I<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+}
+}
+
+defm VRCP14 : avx512_fp_unop_s<0x4D, "vrcp14">,
+ avx512_fp_unop_p<0x4C, "vrcp14", X86frcp>,
+ avx512_fp_unop_p_int<0x4C, "vrcp14",
+ int_x86_avx512_rcp14_ps_512, int_x86_avx512_rcp14_pd_512>;
+
+defm VRSQRT14 : avx512_fp_unop_s<0x4F, "vrsqrt14">,
+ avx512_fp_unop_p<0x4E, "vrsqrt14", X86frsqrt>,
+ avx512_fp_unop_p_int<0x4E, "vrsqrt14",
+ int_x86_avx512_rsqrt14_ps_512, int_x86_avx512_rsqrt14_pd_512>;
+
+def : Pat<(int_x86_avx512_rsqrt14_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRSQRT14SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+def : Pat<(int_x86_avx512_rsqrt14_ss sse_load_f32:$src),
+ (VRSQRT14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+def : Pat<(int_x86_avx512_rcp14_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRCP14SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+def : Pat<(int_x86_avx512_rcp14_ss sse_load_f32:$src),
+ (VRCP14SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+let AddedComplexity = 20, Predicates = [HasERI] in {
+defm VRCP28 : avx512_fp_unop_s<0xCB, "vrcp28">,
+ avx512_fp_unop_p<0xCA, "vrcp28", X86frcp>,
+ avx512_fp_unop_p_int<0xCA, "vrcp28",
+ int_x86_avx512_rcp28_ps_512, int_x86_avx512_rcp28_pd_512>;
+
+defm VRSQRT28 : avx512_fp_unop_s<0xCD, "vrsqrt28">,
+ avx512_fp_unop_p<0xCC, "vrsqrt28", X86frsqrt>,
+ avx512_fp_unop_p_int<0xCC, "vrsqrt28",
+ int_x86_avx512_rsqrt28_ps_512, int_x86_avx512_rsqrt28_pd_512>;
+}
+
+let Predicates = [HasERI] in {
+ def : Pat<(int_x86_avx512_rsqrt28_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRSQRT28SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_avx512_rsqrt28_ss sse_load_f32:$src),
+ (VRSQRT28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_avx512_rcp28_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VRCP28SSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_avx512_rcp28_ss sse_load_f32:$src),
+ (VRCP28SSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+}
+multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Intrinsic V16F32Int, Intrinsic V8F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v16f32 (OpNode VR512:$src)))], itins_s.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PSZrm : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (OpNode (v16f32 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_s.rm>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+ def PDZrr : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (v8f64 (OpNode VR512:$src)))], itins_d.rr>,
+ EVEX, EVEX_V512;
+
+ let mayLoad = 1 in
+ def PDZrm : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (OpNode
+ (v8f64 (bitconvert (memopv16f32 addr:$src)))))],
+ itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+ def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr,
+ "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V16F32Int VR512:$src))]>,
+ EVEX, EVEX_V512;
+ def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst,
+ (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int VR512:$src))]>,
+ EVEX, EVEX_V512, VEX_W;
+ def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
+ !strconcat(OpcodeStr,
+ "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
+ Intrinsic F32Int, Intrinsic F64Int,
+ OpndItins itins_s, OpndItins itins_d> {
+ def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
+ (ins FR32X:$src1, FR32X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rr>, XS, EVEX_4V;
+ def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XS, EVEX_4V;
+ let mayLoad = 1 in {
+ def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
+ (ins FR32X:$src1, f32mem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2),
+ !strconcat(OpcodeStr,
+ "ss{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F32Int VR128X:$src1, sse_load_f32:$src2))],
+ itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+ }
+ def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
+ (ins FR64X:$src1, FR64X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W;
+ def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, VR128X:$src2))],
+ itins_s.rr>, XD, EVEX_4V, VEX_W;
+ let mayLoad = 1 in {
+ def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
+ (ins FR64X:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr,
+ "sd{z}\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
+ }
+}
+
+
+defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
+ int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
+ SSE_SQRTSS, SSE_SQRTSD>,
+ avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
+ int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
+ SSE_SQRTPS, SSE_SQRTPD>;
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(f32 (fsqrt FR32X:$src)),
+ (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+ def : Pat<(f64 (fsqrt FR64X:$src)),
+ (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
+ def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(f32 (X86frsqrt FR32X:$src)),
+ (VRSQRT14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRT14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(f32 (X86frcp FR32X:$src)),
+ (VRCP14SSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCP14SSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[OptForSize]>;
+
+ def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
+ (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR32)),
+ VR128X)>;
+ def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
+ (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
+
+ def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
+ (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS VR128X:$src, FR64)),
+ VR128X)>;
+ def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
+ (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+}
+
+
+multiclass avx512_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int,
+ CD8VForm VForm> {
+let ExeDomain = SSEPackedSingle in {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : AVX512AIi8<opcps, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PSm : AVX512AIi8<opcps, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<32, VForm>;
+} // ExeDomain = SSEPackedSingle
+
+let ExeDomain = SSEPackedDouble in {
+ // Vector intrinsic operation, reg
+ def PDr : AVX512AIi8<opcpd, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>;
+
+ // Vector intrinsic operation, mem
+ def PDm : AVX512AIi8<opcpd, MRMSrcMem,
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ EVEX_CD8<64, VForm>;
+} // ExeDomain = SSEPackedDouble
+}
+
+multiclass avx512_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int> {
+let ExeDomain = GenericDomain in {
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SSr : AVX512AIi8<opcss, MRMSrcReg,
+ (outs FR32X:$dst), (ins FR32X:$src1, FR32X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>;
+
+ // Intrinsic operation, reg.
+ def SSr_Int : AVX512AIi8<opcss, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1, VR128X:$src2, imm:$src3))]>;
+
+ // Intrinsic operation, mem.
+ def SSm : AVX512AIi8<opcss, MRMSrcMem, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F32Int VR128X:$src1,
+ sse_load_f32:$src2, imm:$src3))]>,
+ EVEX_CD8<32, CD8VT1>;
+
+ // Operation, reg.
+ let hasSideEffects = 0 in
+ def SDr : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs FR64X:$dst), (ins FR64X:$src1, FR64X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, VEX_W;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : AVX512AIi8<opcsd, MRMSrcReg,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst, (F64Int VR128X:$src1, VR128X:$src2, imm:$src3))]>,
+ VEX_W;
+
+ // Intrinsic operation, mem.
+ def SDm : AVX512AIi8<opcsd, MRMSrcMem,
+ (outs VR128X:$dst), (ins VR128X:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ VEX_W, EVEX_CD8<64, CD8VT1>;
+} // ExeDomain = GenericDomain
+}
+
+let Predicates = [HasAVX512] in {
+ defm VRNDSCALE : avx512_fp_binop_rm<0x0A, 0x0B, "vrndscale",
+ int_x86_avx512_rndscale_ss,
+ int_x86_avx512_rndscale_sd>, EVEX_4V;
+
+ defm VRNDSCALEZ : avx512_fp_unop_rm<0x08, 0x09, "vrndscale", f256mem, VR512,
+ memopv16f32, memopv8f64,
+ int_x86_avx512_rndscale_ps_512,
+ int_x86_avx512_rndscale_pd_512, CD8VF>,
+ EVEX, EVEX_V512;
+}
+
+def : Pat<(ffloor FR32X:$src),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32X:$src)),
+ (VRNDSCALESSr (f32 (IMPLICIT_DEF)), FR32X:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64X:$src)),
+ (VRNDSCALESDr (f64 (IMPLICIT_DEF)), FR64X:$src, (i32 0x3))>;
+
+def : Pat<(v16f32 (ffloor VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+ (VRNDSCALEZPSr VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+ (VRNDSCALEZPDr VR512:$src, (i32 0x3))>;
+
+//-------------------------------------------------
+// Integer truncate and extend operations
+//-------------------------------------------------
+
+multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
+ RegisterClass dstRC, RegisterClass srcRC,
+ RegisterClass KRC, X86MemOperand x86memop> {
+ def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ (ins srcRC:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+
+ def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ (ins KRC:$mask, srcRC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_KZ;
+
+ def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+}
+defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
+defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
+ i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
+defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
+ i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
+defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
+ i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
+defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
+ i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
+
+def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
+def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
+def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
+def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
+def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
+
+def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
+ (VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
+def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
+ (VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
+def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
+ (VPMOVQWkrr VK8WM:$mask, VR512:$src)>;
+def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
+ (VPMOVQDkrr VK8WM:$mask, VR512:$src)>;
+
+
+multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
+ RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag,
+ X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
+
+ def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
+ (ins SrcRC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst,
+ (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
+ EVEX;
+}
+
+defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VQ>;
+defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VO>;
+defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext,
+ memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VH>;
+defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext,
+ memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VQ>;
+defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext,
+ memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VQ>;
+defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
+ EVEX_CD8<8, CD8VO>;
+defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext,
+ memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VH>;
+defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext,
+ memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
+ EVEX_CD8<16, CD8VQ>;
+defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext,
+ memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
+ EVEX_CD8<32, CD8VH>;
+
+//===----------------------------------------------------------------------===//
+// GATHER - SCATTER Operations
+
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayLoad = 1,
+ Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
+ (ins RC:$src1, KRC:$mask, memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayStore = 1, Constraints = "$mask = $mask_wb" in
+ def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
+ (ins memop:$dst, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
+}
+
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+
+multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string OpcodeStr, PatFrag mem_frag,
+ Domain d> {
+ def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
+ (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
+ EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+ (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
+ EVEX_4V, Sched<[WriteShuffle]>;
+}
+
+defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", memopv16f32,
+ SSEPackedSingle>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", memopv8f64,
+ SSEPackedDouble>, OpSize, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
+def : Pat<(v16i32 (X86Shufp VR512:$src1,
+ (memopv16i32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
+def : Pat<(v8i64 (X86Shufp VR512:$src1,
+ (memopv8i64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
+
+multiclass avx512_alignr<string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop> {
+ def rri : AVX512AIi8<0x03, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, EVEX_4V;
+ let mayLoad = 1 in
+ def rmi : AVX512AIi8<0x03, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, EVEX_4V;
+}
+defm VALIGND : avx512_alignr<"valignd", VR512, i512mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VALIGNQ : avx512_alignr<"valignq", VR512, i512mem>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16f32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v8f64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v16i32 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNDrri VR512:$src2, VR512:$src1, imm:$imm)>;
+def : Pat<(v8i64 (X86PAlignr VR512:$src1, VR512:$src2, (i8 imm:$imm))),
+ (VALIGNQrri VR512:$src2, VR512:$src1, imm:$imm)>;
+
+multiclass avx512_vpabs<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
+ EVEX;
+}
+
+defm VPABSD : avx512_vpabs<0x1E, "vpabsd", VR512, i512mem>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
+defm VPABSQ : avx512_vpabs<0x1F, "vpabsq", VR512, i512mem>, EVEX_V512, VEX_W,
+ EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, RegisterClass KRC, PatFrag memop_frag,
+ X86MemOperand x86memop, PatFrag scalar_mfrag,
+ X86MemOperand x86scalar_mop, string BrdcstStr,
+ Intrinsic Int, Intrinsic maskInt, Intrinsic maskzInt> {
+ def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
+ [(set RC:$dst, (Int RC:$src))]>, EVEX;
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
+ [(set RC:$dst, (Int (memop_frag addr:$src)))]>, EVEX;
+ def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86scalar_mop:$src),
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
+ ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
+ []>, EVEX, EVEX_B;
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins KRC:$mask, RC:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (maskzInt KRC:$mask, RC:$src))]>, EVEX, EVEX_KZ;
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, x86memop:$src),
+ !strconcat(OpcodeStr,
+ "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ [(set RC:$dst, (maskzInt KRC:$mask, (memop_frag addr:$src)))]>,
+ EVEX, EVEX_KZ;
+ def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins KRC:$mask, x86scalar_mop:$src),
+ !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
+ ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
+ BrdcstStr, "}"),
+ []>, EVEX, EVEX_KZ, EVEX_B;
+
+ let Constraints = "$src1 = $dst" in {
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, RC:$src2))]>, EVEX, EVEX_K;
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86memop:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ [(set RC:$dst, (maskInt RC:$src1, KRC:$mask, (memop_frag addr:$src2)))]>, EVEX, EVEX_K;
+ def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
+ !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
+ ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
+ []>, EVEX, EVEX_K, EVEX_B;
+ }
+}
+
+let Predicates = [HasCDI] in {
+defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
+ memopv16i32, i512mem, loadi32, i32mem, "{1to16}",
+ int_x86_avx512_conflict_d_512,
+ int_x86_avx512_conflict_d_mask_512,
+ int_x86_avx512_conflict_d_maskz_512>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
+ memopv8i64, i512mem, loadi64, i64mem, "{1to8}",
+ int_x86_avx512_conflict_q_512,
+ int_x86_avx512_conflict_q_mask_512,
+ int_x86_avx512_conflict_q_maskz_512>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+}
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 9ce02ba..7fc9c44 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -497,6 +497,21 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
+let isCodeGenOnly = 1, CodeSize = 2 in {
+def INC32_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+ "inc{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def INC32_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+ "inc{l}\t$dst", [], IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+def DEC32_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+ "dec{w}\t$dst", [], IIC_UNARY_REG>,
+ OpSize, Requires<[In32BitMode]>;
+def DEC32_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+ "dec{l}\t$dst", [], IIC_UNARY_REG>,
+ Requires<[In32BitMode]>;
+} // isCodeGenOnly = 1, CodeSize = 2
+
} // Constraints = "$src1 = $dst", SchedRW
let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
@@ -578,7 +593,6 @@ let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in {
} // CodeSize = 2, SchedRW
} // Defs = [EFLAGS]
-
/// X86TypeInfo - This is a bunch of information that describes relevant X86
/// information about value types. For example, it can tell you what the
/// register class and preferred load to use.
@@ -726,20 +740,25 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
- EFLAGS))], IIC_BIN_NONMEM>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
-class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, MRMSrcReg, typeinfo,
(outs typeinfo.RegClass:$dst),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
- mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $dst|$dst, $src2}", [], itin>,
Sched<[WriteALU]> {
// The disassembler should know about this, but not the asmparser.
let isCodeGenOnly = 1;
let hasSideEffects = 0;
}
+// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed encoding).
+class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+ : BinOpRR_Rev<opcode, mnemonic, typeinfo, IIC_BIN_CARRY_NONMEM>;
+
// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
@@ -753,10 +772,11 @@ class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
// BinOpRM - Instructions like "add reg, reg, [mem]".
class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, list<dag> pattern>
+ dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALULd, ReadAfterLd]>;
// BinOpRM_R - Instructions like "add reg, reg, [mem]".
@@ -786,14 +806,15 @@ class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_MEM>;
// BinOpRI - Instructions like "add reg, reg, imm".
class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, list<dag> pattern>
+ Format f, dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -824,14 +845,15 @@ class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpRI8 - Instructions like "add reg, reg, imm8".
class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Format f, dag outlist, list<dag> pattern>
+ Format f, dag outlist, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, f, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
- mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>,
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>,
Sched<[WriteALU]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -863,14 +885,14 @@ class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
- EFLAGS))]>;
+ EFLAGS))], IIC_BIN_CARRY_NONMEM>;
// BinOpMR - Instructions like "add [mem], reg".
class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- list<dag> pattern>
+ list<dag> pattern, InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]>;
// BinOpMR_RMW - Instructions like "add [mem], reg".
@@ -886,7 +908,7 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -896,10 +918,11 @@ class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpMI - Instructions like "add [mem], imm".
class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern, bits<8> opcode = 0x80>
+ Format f, list<dag> pattern, bits<8> opcode = 0x80,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<opcode, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]> {
let ImmT = typeinfo.ImmEncoding;
}
@@ -917,7 +940,7 @@ class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI<mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], 0x80, IIC_BIN_CARRY_MEM>;
// BinOpMI_F - Instructions like "cmp [mem], imm".
class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
@@ -929,10 +952,11 @@ class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpMI8 - Instructions like "add [mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
- Format f, list<dag> pattern>
+ Format f, list<dag> pattern,
+ InstrItinClass itin = IIC_BIN_MEM>
: ITy<0x82, f, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
- mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>,
+ mnemonic, "{$src, $dst|$dst, $src}", pattern, itin>,
Sched<[WriteALULd, WriteRMW]> {
let ImmT = Imm8; // Always 8-bit immediate.
}
@@ -951,7 +975,7 @@ class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
: BinOpMI8<mnemonic, typeinfo, f,
[(store (opnode (load addr:$dst),
typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>;
+ (implicit EFLAGS)], IIC_BIN_CARRY_MEM>;
// BinOpMI8_F - Instructions like "cmp [mem], imm8".
class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
@@ -962,10 +986,11 @@ class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
+ Register areg, string operands,
+ InstrItinClass itin = IIC_BIN_NONMEM>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []>, Sched<[WriteALU]> {
+ mnemonic, operands, [], itin>, Sched<[WriteALU]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg, EFLAGS];
@@ -976,7 +1001,8 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// and use EFLAGS.
class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
- : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
+ : BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
+ IIC_BIN_CARRY_NONMEM> {
let Uses = [areg, EFLAGS];
}
@@ -1070,10 +1096,10 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
} // isCommutable
- def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
- def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
- def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
- def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+ def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 8969946..7d10b67 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -884,6 +884,24 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
[(set VR256:$dst,
(v4i64 (X86cmov VR256:$t, VR256:$f, imm:$cond,
EFLAGS)))]>;
+ def CMOV_V8I64 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V8I64 PSEUDO!",
+ [(set VR512:$dst,
+ (v8i64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V8F64 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V8F64 PSEUDO!",
+ [(set VR512:$dst,
+ (v8f64 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
+ def CMOV_V16F32 : I<0, Pseudo,
+ (outs VR512:$dst), (ins VR512:$t, VR512:$f, i8imm:$cond),
+ "#CMOV_V16F32 PSEUDO!",
+ [(set VR512:$dst,
+ (v16f32 (X86cmov VR512:$t, VR512:$f, imm:$cond,
+ EFLAGS)))]>;
}
@@ -917,8 +935,6 @@ def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV32mi addr:$dst, tblockaddress:$src)>;
-
-
// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
// code model mode, should use 'movabs'. FIXME: This is really a hack, the
// 'movabs' predicate should handle this sort of thing.
@@ -966,14 +982,12 @@ def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
(MOV64mi32 addr:$dst, tblockaddress:$src)>,
Requires<[NearData, IsStatic]>;
-
-
// Calls
// tls has some funny stuff here...
// This corresponds to movabs $foo@tpoff, %rax
def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
- (MOV64ri tglobaltlsaddr :$dst)>;
+ (MOV64ri32 tglobaltlsaddr :$dst)>;
// This corresponds to add $foo@tpoff, %rax
def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
(ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 0e69651..e4ccc06 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -49,10 +49,12 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
+ let hasSideEffects = 0 in
def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
"jmp\t$dst", [], IIC_JMP_REL>;
// FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious
// with JMP_1.
+ let hasSideEffects = 0 in
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmpq\t$dst", [], IIC_JMP_REL>;
}
@@ -60,6 +62,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
// Conditional Branches.
let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in {
multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+ let hasSideEffects = 0 in
def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
IIC_Jcc>;
def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
@@ -85,7 +88,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
// jcx/jecx/jrcx instructions.
-let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in {
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
// 32-bit mode, the address size prefix is jcxz and the unprefixed version is
// jecxz.
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 28954c6..4090550 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -14,26 +14,26 @@
let neverHasSideEffects = 1 in {
let Defs = [AX], Uses = [AL] in
def CBW : I<0x98, RawFrm, (outs), (ins),
- "{cbtw|cbw}", []>, OpSize; // AX = signext(AL)
+ "{cbtw|cbw}", [], IIC_CBW>, OpSize; // AX = signext(AL)
let Defs = [EAX], Uses = [AX] in
def CWDE : I<0x98, RawFrm, (outs), (ins),
- "{cwtl|cwde}", []>; // EAX = signext(AX)
+ "{cwtl|cwde}", [], IIC_CBW>; // EAX = signext(AX)
let Defs = [AX,DX], Uses = [AX] in
def CWD : I<0x99, RawFrm, (outs), (ins),
- "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+ "{cwtd|cwd}", [], IIC_CBW>, OpSize; // DX:AX = signext(AX)
let Defs = [EAX,EDX], Uses = [EAX] in
def CDQ : I<0x99, RawFrm, (outs), (ins),
- "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+ "{cltd|cdq}", [], IIC_CBW>; // EDX:EAX = signext(EAX)
let Defs = [RAX], Uses = [EAX] in
def CDQE : RI<0x98, RawFrm, (outs), (ins),
- "{cltq|cdqe}", []>; // RAX = signext(EAX)
+ "{cltq|cdqe}", [], IIC_CBW>; // RAX = signext(EAX)
let Defs = [RAX,RDX], Uses = [RAX] in
def CQO : RI<0x99, RawFrm, (outs), (ins),
- "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+ "{cqto|cqo}", [], IIC_CBW>; // RDX:RAX = signext(RAX)
}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 7759a8a..69cd5a5 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -74,43 +74,43 @@ let neverHasSideEffects = 1 in {
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
- memopv8f32, X86Fmadd, v4f32, v8f32>;
- defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
- memopv8f32, X86Fmsub, v4f32, v8f32>;
+ defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32,
+ loadv8f32, X86Fmadd, v4f32, v8f32>;
+ defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32,
+ loadv8f32, X86Fmsub, v4f32, v8f32>;
defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
- memopv4f32, memopv8f32, X86Fmaddsub,
+ loadv4f32, loadv8f32, X86Fmaddsub,
v4f32, v8f32>;
defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
- memopv4f32, memopv8f32, X86Fmsubadd,
+ loadv4f32, loadv8f32, X86Fmsubadd,
v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
- memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
- defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
- memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+ defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64,
+ loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
+ defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64,
+ loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
- memopv2f64, memopv4f64, X86Fmaddsub,
+ loadv2f64, loadv4f64, X86Fmaddsub,
v2f64, v4f64>, VEX_W;
defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
- memopv2f64, memopv4f64, X86Fmsubadd,
+ loadv2f64, loadv4f64, X86Fmsubadd,
v2f64, v4f64>, VEX_W;
}
// Fused Negative Multiply-Add
let ExeDomain = SSEPackedSingle in {
- defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32,
- memopv8f32, X86Fnmadd, v4f32, v8f32>;
- defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32,
- memopv8f32, X86Fnmsub, v4f32, v8f32>;
+ defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", loadv4f32,
+ loadv8f32, X86Fnmadd, v4f32, v8f32>;
+ defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", loadv4f32,
+ loadv8f32, X86Fnmsub, v4f32, v8f32>;
}
let ExeDomain = SSEPackedDouble in {
- defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
- memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+ defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64,
+ loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
- memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+ loadv2f64, loadv4f64, X86Fnmsub, v2f64,
v4f64>, VEX_W;
}
@@ -206,25 +206,26 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, MemOp4;
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3)))]>, VEX_W, MemOp4;
+ (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>;
+ (OpNode RC:$src1, (mem_frag addr:$src2), RC:$src3))]>, VEX_LIG;
// For disassembler
let isCodeGenOnly = 1, hasSideEffects = 0 in
def rr_REV : FMA4<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
+ VEX_LIG;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
@@ -235,19 +236,19 @@ multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+ (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG, MemOp4;
def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
- mem_cpat:$src3))]>, VEX_W, MemOp4;
+ mem_cpat:$src3))]>, VEX_W, VEX_LIG, MemOp4;
def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+ (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG;
}
multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -338,31 +339,31 @@ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
let ExeDomain = SSEPackedSingle in {
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFNMADDPS4 : fma4p<0x78, "vfnmaddps", X86Fnmadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFNMSUBPS4 : fma4p<0x7C, "vfnmsubps", X86Fnmsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", X86Fmaddsub, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", X86Fmsubadd, v4f32, v8f32,
- memopv4f32, memopv8f32>;
+ loadv4f32, loadv8f32>;
}
let ExeDomain = SSEPackedDouble in {
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFNMADDPD4 : fma4p<0x79, "vfnmaddpd", X86Fnmadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFNMSUBPD4 : fma4p<0x7D, "vfnmsubpd", X86Fnmsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", X86Fmaddsub, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", X86Fmsubadd, v2f64, v4f64,
- memopv2f64, memopv4f64>;
+ loadv2f64, loadv4f64>;
}
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 64018b3..0fd9011 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -139,6 +139,7 @@ class T8XS { bits<5> Prefix = 18; }
class TAXD { bits<5> Prefix = 19; }
class XOP8 { bits<5> Prefix = 20; }
class XOP9 { bits<5> Prefix = 21; }
+class XOPA { bits<5> Prefix = 22; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -339,10 +340,11 @@ def __xd : XD;
class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [UseAVX],
!if(!eq(Prefix, __xs.Prefix), [UseSSE1],
!if(!eq(Prefix, __xd.Prefix), [UseSSE2],
- !if(hasOpSizePrefix, [UseSSE2], [UseSSE1]))));
+ !if(hasOpSizePrefix, [UseSSE2], [UseSSE1])))));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -352,8 +354,9 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [UseAVX],
+ !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -363,8 +366,9 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
InstrItinClass itin, Domain d>
: I<o, F, outs, ins, asm, pattern, itin, d> {
- let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
@@ -381,11 +385,12 @@ class MMXPI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin, Domain d>
: Ii8<o, F, outs, ins, asm, pattern, itin, d> {
- let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
- !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1]));
+ let Predicates = !if(hasEVEXPrefix /* EVEX */, [HasAVX512],
+ !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [UseSSE2], [UseSSE1])));
// AVX instructions have a 'v' prefix in the mnemonic
- let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
}
// SSE1 Instruction Templates:
@@ -460,7 +465,7 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
- Requires<[HasAVX]>;
+ Requires<[UseAVX]>;
class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
@@ -472,7 +477,7 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
class VS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, TB,
- OpSize, Requires<[HasAVX]>;
+ OpSize, Requires<[UseAVX]>;
class S2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB,
@@ -641,7 +646,7 @@ class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX512]>;
class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
- : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>,
+ : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TB,
Requires<[HasAVX512]>;
class AVX512PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
@@ -653,10 +658,10 @@ class AVX512PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX512]>;
class AVX512PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d, InstrItinClass itin = NoItinerary>
- : Ii8<o, F, outs, ins, asm, pattern, itin, d>, Requires<[HasAVX512]>;
+ : Ii8<o, F, outs, ins, asm, pattern, itin, d>, TB, Requires<[HasAVX512]>;
class AVX512PI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, Domain d, InstrItinClass itin = NoItinerary>
- : I<o, F, outs, ins, asm, pattern, itin, d>, Requires<[HasAVX512]>;
+ : I<o, F, outs, ins, asm, pattern, itin, d>, TB, Requires<[HasAVX512]>;
class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, T8,
@@ -667,7 +672,7 @@ class AVX512FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
// AES8I
// These use the same encoding as the SSE4.2 T8 and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag>pattern, InstrItinClass itin = NoItinerary>
+ list<dag>pattern, InstrItinClass itin = IIC_AES>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
Requires<[HasAES]>;
@@ -767,6 +772,7 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
//
// MMXI - MMX instructions with TB prefix.
+// MMXI32 - MMX instructions with TB prefix valid only in 32 bit mode.
// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
// MMX2I - MMX / SSE2 instructions with TB and OpSize prefixes.
// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
@@ -776,6 +782,9 @@ class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, InstrItinClass itin = NoItinerary>
+ : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In32BitMode]>;
class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0b51521..1fed424 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -105,6 +105,13 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCisInt<1>]>>;
+def X86vtrunc : SDNode<"X86ISD::VTRUNC",
+ SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>]>>;
+def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVec<2>, SDTCisInt<2>]>>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>]>>;
@@ -118,6 +125,15 @@ def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+def X86IntCmpMask : SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>;
+def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
+def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
+
+def X86CmpMaskCC : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>]>>;
@@ -140,6 +156,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
+def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
@@ -151,6 +170,8 @@ def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
+def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>]>;
@@ -194,11 +215,14 @@ def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
+def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
+def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
@@ -236,13 +260,13 @@ def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
def ssmem : Operand<v4f32> {
let PrintMethod = "printf32mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem32AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
def sdmem : Operand<v2f64> {
let PrintMethod = "printf64mem";
let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
+ let ParserMatchClass = X86Mem64AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
@@ -262,9 +286,16 @@ def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
-// 128-/256-bit extload pattern fragments
+// 512-bit load pattern fragments
+def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
+def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
+def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
+def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
+
+// 128-/256-/512-bit extload pattern fragments
def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
+def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -278,6 +309,12 @@ def alignedstore256 : PatFrag<(ops node:$val, node:$ptr),
return cast<StoreSDNode>(N)->getAlignment() >= 32;
}]>;
+// Like 'store', but always requires 512-bit vector alignment.
+def alignedstore512 : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 64;
+}]>;
+
// Like 'load', but always requires 128-bit vector alignment.
def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
@@ -293,6 +330,11 @@ def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 32;
}]>;
+// Like 'load', but always requires 512-bit vector alignment.
+def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 64;
+}]>;
+
def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
@@ -316,6 +358,16 @@ def alignedloadv4f64 : PatFrag<(ops node:$ptr),
def alignedloadv4i64 : PatFrag<(ops node:$ptr),
(v4i64 (alignedload256 node:$ptr))>;
+// 512-bit aligned load pattern fragments
+def alignedloadv16f32 : PatFrag<(ops node:$ptr),
+ (v16f32 (alignedload512 node:$ptr))>;
+def alignedloadv16i32 : PatFrag<(ops node:$ptr),
+ (v16i32 (alignedload512 node:$ptr))>;
+def alignedloadv8f64 : PatFrag<(ops node:$ptr),
+ (v8f64 (alignedload512 node:$ptr))>;
+def alignedloadv8i64 : PatFrag<(ops node:$ptr),
+ (v8i64 (alignedload512 node:$ptr))>;
+
// Like 'load', but uses special alignment checks suitable for use in
// memory operands in most SSE instructions, which are required to
// be naturally aligned on some targets but not on others. If the subtarget
@@ -327,6 +379,16 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|| cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
+def memop4 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def memop8 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
@@ -342,6 +404,12 @@ def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+// 512-bit memop pattern fragments
+def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop4 node:$ptr))>;
+def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop8 node:$ptr))>;
+def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop4 node:$ptr))>;
+def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop8 node:$ptr))>;
+
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
// FIXME: 8 byte alignment for mmx reads is not required
@@ -391,6 +459,10 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
+// 512-bit bitconvert pattern fragments
+def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
+def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
+
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0443a93..2461773 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -35,7 +36,7 @@
#include "llvm/Target/TargetOptions.h"
#include <limits>
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"
using namespace llvm;
@@ -81,6 +82,7 @@ enum {
TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
TB_ALIGN_16 = 16 << TB_ALIGN_SHIFT,
TB_ALIGN_32 = 32 << TB_ALIGN_SHIFT,
+ TB_ALIGN_64 = 64 << TB_ALIGN_SHIFT,
TB_ALIGN_MASK = 0xff << TB_ALIGN_SHIFT
};
@@ -90,6 +92,9 @@ struct X86OpTblEntry {
uint16_t Flags;
};
+// Pin the vtable to this file.
+void X86InstrInfo::anchor() {}
+
X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
: X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
? X86::ADJCALLSTACKDOWN64
@@ -298,8 +303,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
{ X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
{ X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
- { X86::FsMOVAPDrr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::FsMOVAPSrr, X86::MOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
{ X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
{ X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
@@ -356,8 +359,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
// AVX 128-bit versions of foldable instructions
{ X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE },
- { X86::FsVMOVAPDrr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::FsVMOVAPSrr, X86::VMOVSSmr, TB_FOLDED_STORE | TB_NO_REVERSE },
{ X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
{ X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -374,7 +375,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
{ X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }
+ { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
+ // AVX-512 foldable instructions
+ { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }
};
for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
@@ -400,8 +403,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
{ X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
{ X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::FsMOVAPDrr, X86::MOVSDrm, TB_NO_REVERSE },
- { X86::FsMOVAPSrr, X86::MOVSSrm, TB_NO_REVERSE },
{ X86::IMUL16rri, X86::IMUL16rmi, 0 },
{ X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
{ X86::IMUL32rri, X86::IMUL32rmi, 0 },
@@ -444,7 +445,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 },
{ X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
@@ -493,8 +493,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
{ X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
{ X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
- { X86::FsVMOVAPDrr, X86::VMOVSDrm, TB_NO_REVERSE },
- { X86::FsVMOVAPSrr, X86::VMOVSSrm, TB_NO_REVERSE },
{ X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 },
{ X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 },
{ X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
@@ -507,7 +505,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, TB_ALIGN_16 },
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 },
{ X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::VPABSBrr128, X86::VPABSBrm128, 0 },
@@ -552,11 +549,27 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
{ X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- // BMI/BMI2/LZCNT/POPCNT foldable instructions
+ // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
{ X86::BEXTR32rr, X86::BEXTR32rm, 0 },
{ X86::BEXTR64rr, X86::BEXTR64rm, 0 },
+ { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
+ { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
+ { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
+ { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
+ { X86::BLCI32rr, X86::BLCI32rm, 0 },
+ { X86::BLCI64rr, X86::BLCI64rm, 0 },
+ { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
+ { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
+ { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
+ { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
+ { X86::BLCS32rr, X86::BLCS32rm, 0 },
+ { X86::BLCS64rr, X86::BLCS64rm, 0 },
+ { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
+ { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
{ X86::BLSI32rr, X86::BLSI32rm, 0 },
{ X86::BLSI64rr, X86::BLSI64rm, 0 },
+ { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
+ { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
{ X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
{ X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
{ X86::BLSR32rr, X86::BLSR32rm, 0 },
@@ -577,9 +590,27 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
+ { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
+ { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
{ X86::TZCNT16rr, X86::TZCNT16rm, 0 },
{ X86::TZCNT32rr, X86::TZCNT32rm, 0 },
{ X86::TZCNT64rr, X86::TZCNT64rm, 0 },
+ { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
+ { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
+ { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
+ { X86::VMOVDQA32rr, X86::VMOVDQA32rm, TB_ALIGN_64 },
+ { X86::VMOVDQA64rr, X86::VMOVDQA64rm, TB_ALIGN_64 },
+ { X86::VMOVDQU32rr, X86::VMOVDQU32rm, 0 },
+ { X86::VMOVDQU64rr, X86::VMOVDQU64rm, 0 },
+
+ // AES foldable instructions
+ { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
+ { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
+ { X86::VAESIMCrr, X86::VAESIMCrm, TB_ALIGN_16 },
+ { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
@@ -1177,6 +1208,52 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PDEP64rr, X86::PDEP64rm, 0 },
{ X86::PEXT32rr, X86::PEXT32rm, 0 },
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
+
+ // AVX-512 foldable instructions
+ { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
+ { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
+ { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
+ { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
+ { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
+ { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
+ { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
+ { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
+ { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
+ { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
+ { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
+ { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
+ { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
+ { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
+ { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
+ { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
+ { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
+ { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
+ { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
+ { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
+ { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
+ { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
+ { X86::VALIGNQrri, X86::VALIGNQrmi, 0 },
+ { X86::VALIGNDrri, X86::VALIGNDrmi, 0 },
+
+ // AES foldable instructions
+ { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
+ { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
+ { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
+ { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
+ { X86::VAESDECLASTrr, X86::VAESDECLASTrm, TB_ALIGN_16 },
+ { X86::VAESDECrr, X86::VAESDECrm, TB_ALIGN_16 },
+ { X86::VAESENCLASTrr, X86::VAESENCLASTrm, TB_ALIGN_16 },
+ { X86::VAESENCrr, X86::VAESENCrm, TB_ALIGN_16 },
+
+ // SHA foldable instructions
+ { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
+ { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
+ { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
+ { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
+ { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
+ { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
+ { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
@@ -1338,6 +1415,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_16 },
{ X86::VFMSUBADDPS4rrY, X86::VFMSUBADDPS4rmY, TB_ALIGN_32 },
{ X86::VFMSUBADDPD4rrY, X86::VFMSUBADDPD4rmY, TB_ALIGN_32 },
+ // AVX-512 VPERMI instructions with 3 source operands.
+ { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
+ { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
+ { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
+ { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1454,6 +1536,8 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::VMOVDQAYrm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
+ case X86::VMOVDQA32rm:
+ case X86::VMOVDQA64rm:
return true;
}
}
@@ -2890,23 +2974,29 @@ static bool isHReg(unsigned Reg) {
// Try and copy between VR128/VR64 and GR64 registers.
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
- bool HasAVX) {
+ const X86Subtarget& Subtarget) {
+
+
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
// SrcReg(GR64) -> DestReg(VR128)
// SrcReg(GR64) -> DestReg(VR64)
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
if (X86::GR64RegClass.contains(DestReg)) {
- if (X86::VR128RegClass.contains(SrcReg))
+ if (X86::VR128XRegClass.contains(SrcReg))
// Copy from a VR128 register to a GR64 register.
- return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
+ return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr :
+ X86::MOVPQIto64rr);
if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
return X86::MOVSDto64rr;
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
- if (X86::VR128RegClass.contains(DestReg))
- return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
+ if (X86::VR128XRegClass.contains(DestReg))
+ return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr :
+ X86::MOV64toPQIrr);
// Copy from a GR64 register to a VR64 register.
if (X86::VR64RegClass.contains(DestReg))
return X86::MOV64toSDrr;
@@ -2915,14 +3005,30 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
// SrcReg(FR32) -> DestReg(GR32)
// SrcReg(GR32) -> DestReg(FR32)
- if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
+ if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg))
// Copy from a FR32 register to a GR32 register.
- return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+ return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr);
- if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
// Copy from a GR32 register to a FR32 register.
- return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+ return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr);
+ return 0;
+}
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+ if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
+ X86::VR256XRegClass.contains(DestReg, SrcReg) ||
+ X86::VR512RegClass.contains(DestReg, SrcReg)) {
+ DestReg = get512BitSuperRegister(DestReg);
+ SrcReg = get512BitSuperRegister(SrcReg);
+ return X86::VMOVAPSZrr;
+ }
+ if ((X86::VK8RegClass.contains(DestReg) ||
+ X86::VK16RegClass.contains(DestReg)) &&
+ (X86::VK8RegClass.contains(SrcReg) ||
+ X86::VK16RegClass.contains(SrcReg)))
+ return X86::KMOVWkk;
return 0;
}
@@ -2932,7 +3038,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
// First deal with the normal symmetric copies.
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- unsigned Opc;
+ bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2950,14 +3057,17 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
"8-bit H register can not be copied outside GR8_NOREX");
} else
Opc = X86::MOV8rr;
- } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ }
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+ else if (HasAVX512)
+ Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+ else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
- else if (X86::VR64RegClass.contains(DestReg, SrcReg))
- Opc = X86::MMX_MOVQ64rr;
- else
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, HasAVX);
+ if (!Opc)
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>());
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -3005,6 +3115,18 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
bool isStackAligned,
const TargetMachine &TM,
bool load) {
+ if (TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (X86::VK8RegClass.hasSubClassEq(RC) ||
+ X86::VK16RegClass.hasSubClassEq(RC))
+ return load ? X86::KMOVWkm : X86::KMOVWmk;
+ if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
+ if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
+ if (X86::VR512RegClass.hasSubClassEq(RC))
+ return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
+ }
+
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
switch (RC->getSize()) {
default:
@@ -3046,7 +3168,8 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
return load ? X86::LD_Fp80m : X86::ST_FpP80m;
case 16: {
- assert(X86::VR128RegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
+ assert((X86::VR128RegClass.hasSubClassEq(RC) ||
+ X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ?
@@ -3058,12 +3181,19 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
(HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
}
case 32:
- assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+ assert((X86::VR256RegClass.hasSubClassEq(RC) ||
+ X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
// If stack is realigned we can use aligned stores.
if (isStackAligned)
return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
else
return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
+ case 64:
+ assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
+ if (isStackAligned)
+ return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
+ else
+ return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
}
@@ -3090,7 +3220,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const MachineFunction &MF = *MBB.getParent();
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -3106,7 +3236,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -3126,7 +3256,7 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -3140,7 +3270,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+ unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -3722,6 +3852,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::AVX_SET0:
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
+ case X86::AVX512_512_SET0:
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
@@ -3729,6 +3861,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
+ case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+ case X86::KSET1B:
+ case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
}
return false;
}
@@ -3942,18 +4077,6 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::RSQRTSSr_Int:
case X86::SQRTSSr:
case X86::SQRTSSr_Int:
- // AVX encoded versions
- case X86::VCVTSD2SSrr:
- case X86::Int_VCVTSD2SSrr:
- case X86::VCVTSS2SDrr:
- case X86::Int_VCVTSS2SDrr:
- case X86::VRCPSSr:
- case X86::VROUNDSDr:
- case X86::VROUNDSDr_Int:
- case X86::VROUNDSSr:
- case X86::VROUNDSSr_Int:
- case X86::VRSQRTSSr:
- case X86::VSQRTSSr:
return true;
}
@@ -3985,10 +4108,77 @@ getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
return 16;
}
+// Return true for any instruction the copies the high bits of the first source
+// operand into the unused high bits of the destination operand.
+static bool hasUndefRegUpdate(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::VCVTSI2SSrr:
+ case X86::Int_VCVTSI2SSrr:
+ case X86::VCVTSI2SS64rr:
+ case X86::Int_VCVTSI2SS64rr:
+ case X86::VCVTSI2SDrr:
+ case X86::Int_VCVTSI2SDrr:
+ case X86::VCVTSI2SD64rr:
+ case X86::Int_VCVTSI2SD64rr:
+ case X86::VCVTSD2SSrr:
+ case X86::Int_VCVTSD2SSrr:
+ case X86::VCVTSS2SDrr:
+ case X86::Int_VCVTSS2SDrr:
+ case X86::VRCPSSr:
+ case X86::VROUNDSDr:
+ case X86::VROUNDSDr_Int:
+ case X86::VROUNDSSr:
+ case X86::VROUNDSSr_Int:
+ case X86::VRSQRTSSr:
+ case X86::VSQRTSSr:
+
+ // AVX-512
+ case X86::VCVTSD2SSZrr:
+ case X86::VCVTSS2SDZrr:
+ return true;
+ }
+
+ return false;
+}
+
+/// Inform the ExeDepsFix pass how many idle instructions we would like before
+/// certain undef register reads.
+///
+/// This catches the VCVTSI2SD family of instructions:
+///
+/// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
+///
+/// We should to be careful *not* to catch VXOR idioms which are presumably
+/// handled specially in the pipeline:
+///
+/// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1
+///
+/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
+/// high bits that are passed-through are not live.
+unsigned X86InstrInfo::
+getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
+ const TargetRegisterInfo *TRI) const {
+ if (!hasUndefRegUpdate(MI->getOpcode()))
+ return 0;
+
+ // Set the OpNum parameter to the first source operand.
+ OpNum = 1;
+
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ // Use the same magic number as getPartialRegUpdateClearance.
+ return 16;
+ }
+ return 0;
+}
+
void X86InstrInfo::
breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const {
unsigned Reg = MI->getOperand(OpNum).getReg();
+ // If MI kills this register, the false dependence is already broken.
+ if (MI->killsRegister(Reg, TRI))
+ return;
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
@@ -4008,10 +4198,75 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
}
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+static MachineInstr* foldPatchpoint(MachineFunction &MF,
+ MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex,
+ const TargetInstrInfo &TII) {
+ unsigned StartIdx = 0;
+ switch (MI->getOpcode()) {
+ case TargetOpcode::STACKMAP:
+ StartIdx = 2; // Skip ID, nShadowBytes.
+ break;
+ case TargetOpcode::PATCHPOINT: {
+ // For PatchPoint, the call args are not foldable.
+ PatchPointOpers opers(MI);
+ StartIdx = opers.getVarIdx();
+ break;
+ }
+ default:
+ llvm_unreachable("unexpected stackmap opcode");
+ }
+
+ // Return false if any operands requested for folding are not foldable (not
+ // part of the stackmap's live values).
+ for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
+ I != E; ++I) {
+ if (*I < StartIdx)
+ return 0;
+ }
+
+ MachineInstr *NewMI =
+ MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true);
+ MachineInstrBuilder MIB(MF, NewMI);
+
+ // No need to fold return, the meta data, and function arguments
+ for (unsigned i = 0; i < StartIdx; ++i)
+ MIB.addOperand(MI->getOperand(i));
+
+ for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+ assert(MO.getReg() && "patchpoint can only fold a vreg operand");
+ // Compute the spill slot size and offset.
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg());
+ unsigned SpillSize;
+ unsigned SpillOffset;
+ bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
+ SpillOffset, &MF.getTarget());
+ if (!Valid)
+ report_fatal_error("cannot spill patchpoint subregister operand");
+
+ MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp));
+ MIB.addOperand(MachineOperand::CreateImm(SpillSize));
+ MIB.addOperand(MachineOperand::CreateFI(FrameIndex));
+ addOffset(MIB, SpillOffset);
+ }
+ else
+ MIB.addOperand(MO);
+ }
+ return NewMI;
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // Special case stack map and patch point intrinsics.
+ if (MI->getOpcode() == TargetOpcode::STACKMAP
+ || MI->getOpcode() == TargetOpcode::PATCHPOINT) {
+ return foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
+ }
// Check switch flag
if (NoFusing) return NULL;
@@ -4025,6 +4280,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+ // If the function stack isn't realigned we don't want to fold instructions
+ // that need increased alignment.
+ if (!RI.needsStackRealignment(MF))
+ Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4054,6 +4313,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
+ // If loading from a FrameIndex, fold directly from the FrameIndex.
+ unsigned NumOps = LoadMI->getDesc().getNumOperands();
+ int FrameIndex;
+ if (isLoadFromStackSlot(LoadMI, FrameIndex))
+ return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
+
// Check switch flag
if (NoFusing) return NULL;
@@ -4179,7 +4444,6 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return NULL;
// Folding a normal load. Just copy the load's address operands.
- unsigned NumOps = LoadMI->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
break;
@@ -5001,6 +5265,37 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const {
case X86::VSQRTSSm:
case X86::VSQRTSSm_Int:
case X86::VSQRTSSr:
+ case X86::VSQRTPDZrm:
+ case X86::VSQRTPDZrr:
+ case X86::VSQRTPSZrm:
+ case X86::VSQRTPSZrr:
+ case X86::VSQRTSDZm:
+ case X86::VSQRTSDZm_Int:
+ case X86::VSQRTSDZr:
+ case X86::VSQRTSSZm_Int:
+ case X86::VSQRTSSZr:
+ case X86::VSQRTSSZm:
+ case X86::VDIVSDZrm:
+ case X86::VDIVSDZrr:
+ case X86::VDIVSSZrm:
+ case X86::VDIVSSZrr:
+
+ case X86::VGATHERQPSZrm:
+ case X86::VGATHERQPDZrm:
+ case X86::VGATHERDPDZrm:
+ case X86::VGATHERDPSZrm:
+ case X86::VPGATHERQDZrm:
+ case X86::VPGATHERQQZrm:
+ case X86::VPGATHERDDZrm:
+ case X86::VPGATHERDQZrm:
+ case X86::VSCATTERQPDZmr:
+ case X86::VSCATTERQPSZmr:
+ case X86::VSCATTERDPDZmr:
+ case X86::VSCATTERDPSZmr:
+ case X86::VPSCATTERQDZmr:
+ case X86::VPSCATTERQQZmr:
+ case X86::VPSCATTERDDZmr:
+ case X86::VPSCATTERDQZmr:
return true;
}
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index a0d1ba7..600e392 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -152,6 +152,8 @@ class X86InstrInfo : public X86GenInstrInfo {
MemOp2RegOpTableType &M2RTable,
unsigned RegOp, unsigned MemOp, unsigned Flags);
+ virtual void anchor();
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -369,6 +371,8 @@ public:
unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const;
+ unsigned getUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
+ const TargetRegisterInfo *TRI) const;
void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0960a2a..6e5d543 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -248,11 +248,12 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
def X86blsi : SDNode<"X86ISD::BLSI", SDTIntUnaryOp>;
def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
def X86blsr : SDNode<"X86ISD::BLSR", SDTIntUnaryOp>;
+def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>;
+def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -278,53 +279,52 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
// *mem - Operand definitions for the funky X86 addressing mode operands.
//
-def X86MemAsmOperand : AsmOperandClass {
- let Name = "Mem"; let PredicateMethod = "isMem";
+def X86MemAsmOperand : AsmOperandClass {
+ let Name = "Mem";
}
-def X86Mem8AsmOperand : AsmOperandClass {
- let Name = "Mem8"; let PredicateMethod = "isMem8";
+def X86Mem8AsmOperand : AsmOperandClass {
+ let Name = "Mem8"; let RenderMethod = "addMemOperands";
}
-def X86Mem16AsmOperand : AsmOperandClass {
- let Name = "Mem16"; let PredicateMethod = "isMem16";
+def X86Mem16AsmOperand : AsmOperandClass {
+ let Name = "Mem16"; let RenderMethod = "addMemOperands";
}
-def X86Mem32AsmOperand : AsmOperandClass {
- let Name = "Mem32"; let PredicateMethod = "isMem32";
+def X86Mem32AsmOperand : AsmOperandClass {
+ let Name = "Mem32"; let RenderMethod = "addMemOperands";
}
-def X86Mem64AsmOperand : AsmOperandClass {
- let Name = "Mem64"; let PredicateMethod = "isMem64";
+def X86Mem64AsmOperand : AsmOperandClass {
+ let Name = "Mem64"; let RenderMethod = "addMemOperands";
}
-def X86Mem80AsmOperand : AsmOperandClass {
- let Name = "Mem80"; let PredicateMethod = "isMem80";
+def X86Mem80AsmOperand : AsmOperandClass {
+ let Name = "Mem80"; let RenderMethod = "addMemOperands";
}
-def X86Mem128AsmOperand : AsmOperandClass {
- let Name = "Mem128"; let PredicateMethod = "isMem128";
+def X86Mem128AsmOperand : AsmOperandClass {
+ let Name = "Mem128"; let RenderMethod = "addMemOperands";
}
-def X86Mem256AsmOperand : AsmOperandClass {
- let Name = "Mem256"; let PredicateMethod = "isMem256";
+def X86Mem256AsmOperand : AsmOperandClass {
+ let Name = "Mem256"; let RenderMethod = "addMemOperands";
+}
+def X86Mem512AsmOperand : AsmOperandClass {
+ let Name = "Mem512"; let RenderMethod = "addMemOperands";
}
// Gather mem operands
def X86MemVX32Operand : AsmOperandClass {
- let Name = "MemVX32"; let PredicateMethod = "isMemVX32";
+ let Name = "MemVX32"; let RenderMethod = "addMemOperands";
}
def X86MemVY32Operand : AsmOperandClass {
- let Name = "MemVY32"; let PredicateMethod = "isMemVY32";
+ let Name = "MemVY32"; let RenderMethod = "addMemOperands";
+}
+def X86MemVZ32Operand : AsmOperandClass {
+ let Name = "MemVZ32"; let RenderMethod = "addMemOperands";
}
def X86MemVX64Operand : AsmOperandClass {
- let Name = "MemVX64"; let PredicateMethod = "isMemVX64";
+ let Name = "MemVX64"; let RenderMethod = "addMemOperands";
}
def X86MemVY64Operand : AsmOperandClass {
- let Name = "MemVY64"; let PredicateMethod = "isMemVY64";
+ let Name = "MemVY64"; let RenderMethod = "addMemOperands";
}
-
def X86MemVZ64Operand : AsmOperandClass {
- let Name = "MemVZ64"; let PredicateMethod = "isMemVZ64";
-}
-def X86MemVZ32Operand : AsmOperandClass {
- let Name = "MemVZ32"; let PredicateMethod = "isMemVZ32";
-}
-def X86Mem512AsmOperand : AsmOperandClass {
- let Name = "Mem512"; let PredicateMethod = "isMem512";
+ let Name = "MemVZ64"; let RenderMethod = "addMemOperands";
}
def X86AbsMemAsmOperand : AsmOperandClass {
@@ -343,29 +343,29 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
def opaque80mem : X86MemOperand<"printopaquemem">;
def opaque512mem : X86MemOperand<"printopaquemem">;
-def i8mem : X86MemOperand<"printi8mem"> {
+def i8mem : X86MemOperand<"printi8mem"> {
let ParserMatchClass = X86Mem8AsmOperand; }
-def i16mem : X86MemOperand<"printi16mem"> {
+def i16mem : X86MemOperand<"printi16mem"> {
let ParserMatchClass = X86Mem16AsmOperand; }
-def i32mem : X86MemOperand<"printi32mem"> {
+def i32mem : X86MemOperand<"printi32mem"> {
let ParserMatchClass = X86Mem32AsmOperand; }
-def i64mem : X86MemOperand<"printi64mem"> {
+def i64mem : X86MemOperand<"printi64mem"> {
let ParserMatchClass = X86Mem64AsmOperand; }
-def i128mem : X86MemOperand<"printi128mem"> {
+def i128mem : X86MemOperand<"printi128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
-def i256mem : X86MemOperand<"printi256mem"> {
+def i256mem : X86MemOperand<"printi256mem"> {
let ParserMatchClass = X86Mem256AsmOperand; }
-def i512mem : X86MemOperand<"printi512mem"> {
+def i512mem : X86MemOperand<"printi512mem"> {
let ParserMatchClass = X86Mem512AsmOperand; }
-def f32mem : X86MemOperand<"printf32mem"> {
+def f32mem : X86MemOperand<"printf32mem"> {
let ParserMatchClass = X86Mem32AsmOperand; }
-def f64mem : X86MemOperand<"printf64mem"> {
+def f64mem : X86MemOperand<"printf64mem"> {
let ParserMatchClass = X86Mem64AsmOperand; }
-def f80mem : X86MemOperand<"printf80mem"> {
+def f80mem : X86MemOperand<"printf80mem"> {
let ParserMatchClass = X86Mem80AsmOperand; }
-def f128mem : X86MemOperand<"printf128mem"> {
+def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
-def f256mem : X86MemOperand<"printf256mem">{
+def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
def f512mem : X86MemOperand<"printf512mem">{
let ParserMatchClass = X86Mem512AsmOperand; }
@@ -439,17 +439,49 @@ let OperandType = "OPERAND_PCREL",
def i32imm_pcrel : Operand<i32>;
def i16imm_pcrel : Operand<i16>;
-def offset8 : Operand<i64>;
-def offset16 : Operand<i64>;
-def offset32 : Operand<i64>;
-def offset64 : Operand<i64>;
-
// Branch targets have OtherVT type and print as pc-relative values.
def brtarget : Operand<OtherVT>;
def brtarget8 : Operand<OtherVT>;
}
+def X86MemOffs8AsmOperand : AsmOperandClass {
+ let Name = "MemOffs8";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem8AsmOperand];
+}
+def X86MemOffs16AsmOperand : AsmOperandClass {
+ let Name = "MemOffs16";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem16AsmOperand];
+}
+def X86MemOffs32AsmOperand : AsmOperandClass {
+ let Name = "MemOffs32";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem32AsmOperand];
+}
+def X86MemOffs64AsmOperand : AsmOperandClass {
+ let Name = "MemOffs64";
+ let RenderMethod = "addMemOffsOperands";
+ let SuperClasses = [X86Mem64AsmOperand];
+}
+
+let OperandType = "OPERAND_MEMORY" in {
+def offset8 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs8AsmOperand;
+ let PrintMethod = "printMemOffs8"; }
+def offset16 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs16AsmOperand;
+ let PrintMethod = "printMemOffs16"; }
+def offset32 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs32AsmOperand;
+ let PrintMethod = "printMemOffs32"; }
+def offset64 : Operand<i64> {
+ let ParserMatchClass = X86MemOffs64AsmOperand;
+ let PrintMethod = "printMemOffs64"; }
+}
+
+
def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
let OperandType = "OPERAND_IMMEDIATE";
@@ -470,6 +502,14 @@ class ImmZExtAsmOperandClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+def X86GR32orGR64AsmOperand : AsmOperandClass {
+ let Name = "GR32orGR64";
+}
+
+def GR32orGR64 : RegisterOperand<GR32> {
+ let ParserMatchClass = X86GR32orGR64AsmOperand;
+}
+
// Sign-extended immediate classes. We don't need to define the full lattice
// here because there is no instruction with an ambiguity between ImmSExti64i32
// and ImmSExti32i8.
@@ -617,13 +657,13 @@ def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
-def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
+def HasAVX512 : Predicate<"Subtarget->hasAVX512()">;
def UseAVX : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX512()">;
def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
-def HasEMI : Predicate<"Subtarget->hasERI()">;
+def HasERI : Predicate<"Subtarget->hasERI()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
@@ -632,6 +672,7 @@ def HasFMA : Predicate<"Subtarget->hasFMA()">;
def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def HasXOP : Predicate<"Subtarget->hasXOP()">;
+def HasTBM : Predicate<"Subtarget->hasTBM()">;
def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">;
def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">;
def HasF16C : Predicate<"Subtarget->hasF16C()">;
@@ -643,9 +684,10 @@ def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasHLE : Predicate<"Subtarget->hasHLE()">;
def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
+def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
-def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">;
+def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -944,53 +986,56 @@ let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))],
- IIC_BSF>, TB, OpSize, Sched<[WriteShift]>;
+ IIC_BIT_SCAN_REG>, TB, OpSize, Sched<[WriteShift]>;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))],
- IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, OpSize, Sched<[WriteShiftLd]>;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB,
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))],
+ IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))],
- IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))],
- IIC_BSF>, TB, Sched<[WriteShift]>;
+ IIC_BIT_SCAN_REG>, TB, Sched<[WriteShift]>;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))],
- IIC_BSF>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>,
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))],
+ IIC_BIT_SCAN_REG>,
TB, OpSize, Sched<[WriteShift]>;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))],
- IIC_BSR>, TB,
+ IIC_BIT_SCAN_MEM>, TB,
OpSize, Sched<[WriteShiftLd]>;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB,
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))],
+ IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))],
- IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB,
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BIT_SCAN_REG>, TB,
Sched<[WriteShift]>;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))],
- IIC_BSR>, TB, Sched<[WriteShiftLd]>;
+ IIC_BIT_SCAN_MEM>, TB, Sched<[WriteShiftLd]>;
} // Defs = [EFLAGS]
let SchedRW = [WriteMicrocoded] in {
@@ -1072,9 +1117,12 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
} // SchedRW
+let hasSideEffects = 0 in {
+
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
let SchedRW = [WriteALU] in {
+let mayLoad = 1 in {
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|al, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1084,6 +1132,8 @@ def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
"mov{l}\t{$src, %eax|eax, $src}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
+}
+let mayStore = 1 in {
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{b}\t{%al, $dst|$dst, al}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
@@ -1094,34 +1144,40 @@ def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, eax}", [], IIC_MOV_MEM>,
Requires<[In32BitMode]>;
}
+}
// These forms all have full 64-bit absolute addresses in their instructions
// and use the movabs mnemonic to indicate this specific form.
-def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset64:$src),
+let mayLoad = 1 in {
+def MOV64o8a : RIi64_NOREX<0xA0, RawFrm, (outs), (ins offset8:$src),
"movabs{b}\t{$src, %al|al, $src}", []>,
Requires<[In64BitMode]>;
-def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src),
+def MOV64o16a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset16:$src),
"movabs{w}\t{$src, %ax|ax, $src}", []>, OpSize,
Requires<[In64BitMode]>;
-def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset64:$src),
+def MOV64o32a : RIi64_NOREX<0xA1, RawFrm, (outs), (ins offset32:$src),
"movabs{l}\t{$src, %eax|eax, $src}", []>,
Requires<[In64BitMode]>;
def MOV64o64a : RIi64<0xA1, RawFrm, (outs), (ins offset64:$src),
"movabs{q}\t{$src, %rax|rax, $src}", []>,
Requires<[In64BitMode]>;
+}
-def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset64:$dst), (ins),
+let mayStore = 1 in {
+def MOV64ao8 : RIi64_NOREX<0xA2, RawFrm, (outs offset8:$dst), (ins),
"movabs{b}\t{%al, $dst|$dst, al}", []>,
Requires<[In64BitMode]>;
-def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins),
+def MOV64ao16 : RIi64_NOREX<0xA3, RawFrm, (outs offset16:$dst), (ins),
"movabs{w}\t{%ax, $dst|$dst, ax}", []>, OpSize,
Requires<[In64BitMode]>;
-def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset64:$dst), (ins),
+def MOV64ao32 : RIi64_NOREX<0xA3, RawFrm, (outs offset32:$dst), (ins),
"movabs{l}\t{%eax, $dst|$dst, eax}", []>,
Requires<[In64BitMode]>;
def MOV64ao64 : RIi64<0xA3, RawFrm, (outs offset64:$dst), (ins),
"movabs{q}\t{%rax, $dst|$dst, rax}", []>,
Requires<[In64BitMode]>;
+}
+} // hasSideEffects = 0
let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
@@ -1173,7 +1229,7 @@ def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>,
Sched<[WriteMove]>;
-let mayStore = 1 in
+let mayStore = 1, neverHasSideEffects = 1 in
def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", [],
@@ -1814,6 +1870,30 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in {
int_x86_bmi_bzhi_64, loadi64>, VEX_W;
}
+def : Pat<(X86bzhi GR32:$src1, GR8:$src2),
+ (BZHI32rr GR32:$src1,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi (loadi32 addr:$src1), GR8:$src2),
+ (BZHI32rm addr:$src1,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi GR64:$src1, GR8:$src2),
+ (BZHI64rr GR64:$src1,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2),
+ (BZHI64rm addr:$src1,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+let Predicates = [HasBMI] in {
+ def : Pat<(X86bextr GR32:$src1, GR32:$src2),
+ (BEXTR32rr GR32:$src1, GR32:$src2)>;
+ def : Pat<(X86bextr (loadi32 addr:$src1), GR32:$src2),
+ (BEXTR32rm addr:$src1, GR32:$src2)>;
+ def : Pat<(X86bextr GR64:$src1, GR64:$src2),
+ (BEXTR64rr GR64:$src1, GR64:$src2)>;
+ def : Pat<(X86bextr (loadi64 addr:$src1), GR64:$src2),
+ (BEXTR64rm addr:$src1, GR64:$src2)>;
+} // HasBMI
+
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
@@ -1838,6 +1918,134 @@ let Predicates = [HasBMI2] in {
}
//===----------------------------------------------------------------------===//
+// TBM Instructions
+//
+let Predicates = [HasTBM], Defs = [EFLAGS] in {
+
+multiclass tbm_ternary_imm_intr<bits<8> opc, RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ Intrinsic Int, Operand immtype,
+ SDPatternOperator immoperator> {
+ def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (Int RC:$src1, immoperator:$cntl))]>,
+ XOP, XOPA, VEX;
+ def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), immoperator:$cntl))]>,
+ XOP, XOPA, VEX;
+}
+
+defm BEXTRI32 : tbm_ternary_imm_intr<0x10, GR32, "bextr", i32mem, loadi32,
+ int_x86_tbm_bextri_u32, i32imm, imm>;
+defm BEXTRI64 : tbm_ternary_imm_intr<0x10, GR64, "bextr", i64mem, loadi64,
+ int_x86_tbm_bextri_u64, i64i32imm,
+ i64immSExt32>, VEX_W;
+
+multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
+ RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag> {
+let hasSideEffects = 0 in {
+ def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, XOP, XOP9, VEX_4V;
+ let mayLoad = 1 in
+ def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+ []>, XOP, XOP9, VEX_4V;
+}
+}
+
+multiclass tbm_binary_intr<bits<8> opc, string OpcodeStr,
+ Format FormReg, Format FormMem> {
+ defm NAME#32 : tbm_binary_rm<opc, FormReg, FormMem, GR32, OpcodeStr, i32mem,
+ loadi32>;
+ defm NAME#64 : tbm_binary_rm<opc, FormReg, FormMem, GR64, OpcodeStr, i64mem,
+ loadi64>, VEX_W;
+}
+
+defm BLCFILL : tbm_binary_intr<0x01, "blcfill", MRM1r, MRM1m>;
+defm BLCI : tbm_binary_intr<0x02, "blci", MRM6r, MRM6m>;
+defm BLCIC : tbm_binary_intr<0x01, "blcic", MRM5r, MRM5m>;
+defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", MRM1r, MRM1m>;
+defm BLCS : tbm_binary_intr<0x01, "blcs", MRM3r, MRM3m>;
+defm BLSFILL : tbm_binary_intr<0x01, "blsfill", MRM2r, MRM2m>;
+defm BLSIC : tbm_binary_intr<0x01, "blsic", MRM6r, MRM6m>;
+defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>;
+defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
+} // HasTBM, EFLAGS
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments to auto generate TBM instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasTBM] in {
+ def : Pat<(X86bextr GR32:$src1, (i32 imm:$src2)),
+ (BEXTRI32ri GR32:$src1, imm:$src2)>;
+ def : Pat<(X86bextr (loadi32 addr:$src1), (i32 imm:$src2)),
+ (BEXTRI32mi addr:$src1, imm:$src2)>;
+ def : Pat<(X86bextr GR64:$src1, i64immSExt32:$src2),
+ (BEXTRI64ri GR64:$src1, i64immSExt32:$src2)>;
+ def : Pat<(X86bextr (loadi64 addr:$src1), i64immSExt32:$src2),
+ (BEXTRI64mi addr:$src1, i64immSExt32:$src2)>;
+
+ // FIXME: patterns for the load versions are not implemented
+ def : Pat<(and GR32:$src, (add GR32:$src, 1)),
+ (BLCFILL32rr GR32:$src)>;
+ def : Pat<(and GR64:$src, (add GR64:$src, 1)),
+ (BLCFILL64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, 1)),
+ (BLCIC32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, 1)),
+ (BLCIC64rr GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, -1)),
+ (TZMSK32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
+ (TZMSK64rr GR64:$src)>;
+} // HasTBM
+
+//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index cb12956..ba58143 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -204,7 +204,7 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
//===----------------------------------------------------------------------===//
def MMX_EMMS : MMXI<0x77, RawFrm, (outs), (ins), "emms",
- [(int_x86_mmx_emms)]>;
+ [(int_x86_mmx_emms)], IIC_MMX_EMMS>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -236,10 +236,10 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
(MMX_X86movd2w (x86mmx VR64:$src)))],
IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
-let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
- [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
+ [(set VR64:$dst, (bitconvert GR64:$src))],
+ IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
@@ -250,10 +250,6 @@ def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
"movd\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
-def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (bitconvert GR64:$src))], IIC_MMX_MOV_MM_RM>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
@@ -289,7 +285,7 @@ def MMX_MOVQ2DQrr : MMXS2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
(i64 (bitconvert (x86mmx VR64:$src))))))],
IIC_MMX_MOVQ_RR>;
-let neverHasSideEffects = 1 in
+let isCodeGenOnly = 1, hasSideEffects = 1 in {
def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
(ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
@@ -297,6 +293,7 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[], IIC_MMX_MOVQ_RR>;
+}
} // SchedRW
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
@@ -304,21 +301,15 @@ def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>;
-let AddedComplexity = 15 in
-// movd to MMX register zero-extends
-def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))],
- IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
-let AddedComplexity = 20 in
-def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
- (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (x86mmx (X86vzmovl (x86mmx
- (scalar_to_vector (loadi32 addr:$src))))))],
- IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>;
+let Predicates = [HasMMX] in {
+ let AddedComplexity = 15 in
+ // movd to MMX register zero-extends
+ def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
+ (MMX_MOVD64rr GR32:$src)>;
+ let AddedComplexity = 20 in
+ def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+ (MMX_MOVD64rm addr:$src)>;
+}
// Arithmetic Instructions
defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b,
@@ -555,18 +546,18 @@ let Constraints = "$src1 = $dst" in {
// Extract / Insert
def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
- "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
- (iPTR imm:$src2)))],
- IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
+ (outs GR32orGR64:$dst), (ins VR64:$src1, i32i8imm:$src2),
+ "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+ (iPTR imm:$src2)))],
+ IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
- (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+ (ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
- GR32:$src2, (iPTR imm:$src3)))],
+ GR32orGR64:$src2, (iPTR imm:$src3)))],
IIC_MMX_PINSRW>, Sched<[WriteShuffle]>;
def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
@@ -580,9 +571,10 @@ let Constraints = "$src1 = $dst" in {
}
// Mask creation
-def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
+ [(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>;
@@ -599,10 +591,10 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
// Misc.
let SchedRW = [WriteShuffle] in {
let Uses = [EDI] in
-def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
- "maskmovq\t{$mask, $src|$src, $mask}",
- [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
- IIC_MMX_MASKMOV>;
+def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+ "maskmovq\t{$mask, $src|$src, $mask}",
+ [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
+ IIC_MMX_MASKMOV>;
let Uses = [RDI] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index a86006a..a5debc0 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -151,6 +151,34 @@ def SSE_MOVU_ITINS : OpndItins<
IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
>;
+def SSE_DPPD_ITINS : OpndItins<
+ IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM
+>;
+
+def SSE_DPPS_ITINS : OpndItins<
+ IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM
+>;
+
+def DEFAULT_ITINS : OpndItins<
+ IIC_ALU_NONMEM, IIC_ALU_MEM
+>;
+
+def SSE_EXTRACT_ITINS : OpndItins<
+ IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM
+>;
+
+def SSE_INSERT_ITINS : OpndItins<
+ IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM
+>;
+
+def SSE_MPSADBW_ITINS : OpndItins<
+ IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM
+>;
+
+def SSE_PMULLD_ITINS : OpndItins<
+ IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM
+>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
@@ -455,10 +483,10 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// SSE 1 & 2 - Move FP Scalar Instructions
//
// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
-// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
-// is used instead. Register-to-register movss/movsd is not modeled as an
-// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
-// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+// register copies because it's a partial register update; Register-to-register
+// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires
+// that the insert be implementable in terms of a copy, and just mentioned, we
+// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
@@ -526,7 +554,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
}
// Patterns
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVS{S,D} to the lower bits.
@@ -1074,23 +1102,6 @@ let Predicates = [UseSSE1] in {
(MOVUPSmr addr:$dst, VR128:$src)>;
}
-// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
-// bits are disregarded. FIXME: Set encoding to pseudo!
-let neverHasSideEffects = 1, SchedRW = [WriteMove] in {
-def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>;
-}
-
// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
// bits are disregarded. FIXME: Set encoding to pseudo!
let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
@@ -1103,15 +1114,15 @@ let isCodeGenOnly = 1 in {
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))],
IIC_SSE_MOVA_P_RM>, VEX;
+ def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
+ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>;
}
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
- IIC_SSE_MOVA_P_RM>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
- IIC_SSE_MOVA_P_RM>;
}
//===----------------------------------------------------------------------===//
@@ -1327,7 +1338,7 @@ let Predicates = [UseSSE2] in {
// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
-let AddedComplexity = 20 in {
+let AddedComplexity = 20, Predicates = [UseAVX] in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1358,7 +1369,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// MOVLHPS patterns
def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
(VMOVLHPSrr VR128:$src1, VR128:$src2)>;
@@ -1440,7 +1451,7 @@ let neverHasSideEffects = 1 in {
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
Sched<[WriteCvtI2F]>;
@@ -1452,6 +1463,7 @@ let neverHasSideEffects = 1 in {
} // neverHasSideEffects = 1
}
+let Predicates = [UseAVX] in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}",
SSE_CVT_SS2SI_32>,
@@ -1485,7 +1497,7 @@ def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>;
def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
-
+}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
@@ -1499,12 +1511,12 @@ defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
XD, VEX_4V, VEX_W, VEX_LIG;
-def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+let Predicates = [UseAVX] in {
+ def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
-let Predicates = [HasAVX] in {
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1606,19 +1618,21 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+let Predicates = [UseAVX] in {
defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si",
SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
-
+}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD;
defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+let Predicates = [UseAVX] in {
defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, VEX_4V;
@@ -1633,7 +1647,7 @@ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
SSE_CVT_Scalar, 0>, XD,
VEX_4V, VEX_W;
-
+}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
@@ -1652,6 +1666,7 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
+let Predicates = [UseAVX] in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS, VEX;
@@ -1666,6 +1681,7 @@ defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>,
XD, VEX, VEX_W;
+}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
ssmem, sse_load_f32, "cvttss2si",
SSE_CVT_SS2SI_32>, XS;
@@ -1679,13 +1695,14 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
"cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W;
+let Predicates = [UseAVX] in {
defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-
+}
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si",
SSE_CVT_SS2SI_32>, XS;
@@ -1707,6 +1724,7 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
SSEPackedSingle, SSE_CVT_PS>,
TB, Requires<[UseSSE2]>;
+let Predicates = [UseAVX] in {
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
(VCVTSS2SIrr GR32:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}",
@@ -1723,6 +1741,7 @@ def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}",
(VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
+}
def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}",
(CVTSS2SIrr GR32:$dst, VR128:$src), 0>;
@@ -1744,7 +1763,7 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
/// SSE 2 Only
// Convert scalar double to scalar single
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1760,7 +1779,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
- Requires<[HasAVX]>;
+ Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
@@ -1778,27 +1797,27 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>,
@@ -1807,7 +1826,7 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
// Convert scalar single to scalar double
// SSE2 instructions with XS prefix
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1824,16 +1843,16 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
}
def : Pat<(f64 (fextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
+ (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
+ (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
+ Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
- Requires<[HasAVX, OptForSpeed]>;
+ Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1861,14 +1880,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>,
Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
@@ -1895,7 +1914,7 @@ def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
+ (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
@@ -1905,7 +1924,7 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
+ (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
@@ -1934,7 +1953,7 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX,
+ (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX,
Sched<[WriteCvtF2ILd]>;
// YMM only
@@ -1946,7 +1965,7 @@ def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+ (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1972,7 +1991,7 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
- (memopv4f32 addr:$src)))],
+ (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
@@ -1982,7 +2001,7 @@ def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
- (memopv8f32 addr:$src)))],
+ (loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
Sched<[WriteCvtF2ILd]>;
@@ -1999,27 +2018,27 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
(VCVTTPS2DQrr VR128:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+ def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
(VCVTTPS2DQrm addr:$src)>;
def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
(VCVTDQ2PSYrr VR256:$src)>;
- def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
(VCVTDQ2PSYrm addr:$src)>;
def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
(VCVTTPS2DQYrr VR256:$src)>;
- def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
+ def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
(VCVTTPS2DQYrm addr:$src)>;
}
@@ -2056,7 +2075,7 @@ def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
- (memopv2f64 addr:$src)))],
+ (loadv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
// YMM only
@@ -2068,7 +2087,7 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
+ (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -2076,7 +2095,7 @@ def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
- def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
+ def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
(VCVTTPD2DQYrm addr:$src)>;
} // Predicates = [HasAVX]
@@ -2110,7 +2129,7 @@ def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
- (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
+ (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))],
IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
}
@@ -2140,7 +2159,7 @@ def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvtdq2_pd_256
- (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L,
+ (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L,
Sched<[WriteCvtI2FLd]>;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -2162,7 +2181,7 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PDYrr VR128:$src)>;
- def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PDYrm addr:$src)>;
} // Predicates = [HasAVX]
@@ -2181,7 +2200,7 @@ def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
+ (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
// YMM only
@@ -2193,7 +2212,7 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
+ (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
@@ -2215,13 +2234,13 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
let Predicates = [HasAVX] in {
def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
(VCVTDQ2PSYrr VR256:$src)>;
- def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
+ def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
- def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
+ def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
(VCVTPD2PSXrm addr:$src)>;
def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
(VCVTPD2PSYrr VR256:$src)>;
@@ -2299,7 +2318,7 @@ let Constraints = "$src1 = $dst" in {
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSE_ALU_F32S>, // same latency as 32 bit compare
+ SSE_ALU_F64S>,
XD;
}
@@ -2334,7 +2353,7 @@ let Constraints = "$src1 = $dst" in {
SSE_ALU_F32S>, XS;
defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
- SSE_ALU_F32S>, // same latency as f32
+ SSE_ALU_F64S>,
XD;
}
@@ -2403,26 +2422,27 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Operand CC, Intrinsic Int, string asm,
- string asm_alt, Domain d> {
+ string asm_alt, Domain d,
+ OpndItins itins = SSE_ALU_F32P> {
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
- IIC_SSE_CMPP_RR, d>,
+ itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
- IIC_SSE_CMPP_RM, d>,
+ itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let neverHasSideEffects = 1 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>;
+ asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>;
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_CMPP_RM, d>,
+ asm_alt, [], itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
}
@@ -2447,11 +2467,11 @@ let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedSingle>, TB;
+ SSEPackedSingle, SSE_ALU_F32P>, TB;
defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
- SSEPackedDouble>, TB, OpSize;
+ SSEPackedDouble, SSE_ALU_F64P>, TB, OpSize;
}
let Predicates = [HasAVX] in {
@@ -2511,16 +2531,16 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+ loadv4f32, SSEPackedSingle>, TB, VEX_4V;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
+ loadv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ loadv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ loadv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2535,13 +2555,13 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v2i64 (X86Shufp VR128:$src1,
- (memopv2i64 addr:$src2), (i8 imm:$imm))),
+ (loadv2i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
(VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
@@ -2550,13 +2570,13 @@ let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86Shufp VR256:$src1,
- (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86Shufp VR256:$src1,
- (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (loadv4i64 addr:$src2), (i8 imm:$imm))),
(VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -2600,29 +2620,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V;
-defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V;
-defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V, VEX_L;
-defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
-defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, TB, VEX_4V, VEX_L;
-defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L;
@@ -2642,20 +2662,20 @@ let Constraints = "$src1 = $dst" in {
} // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
- def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+ def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+ def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
@@ -2686,13 +2706,10 @@ let Predicates = [UseSSE2] in {
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
Domain d> {
- def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
- Sched<[WriteVecLogic]>;
- def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
- IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>;
+ def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
@@ -2709,29 +2726,15 @@ let Predicates = [HasAVX] in {
OpSize, VEX, VEX_L;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
+ (SUBREG_TO_REG (i64 0),
+ (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
+ (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
-
- // Assembler Only
- def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>;
- def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedDouble>, TB,
- OpSize, VEX, Sched<[WriteVecLogic]>;
- def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>;
- def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
- SSEPackedDouble>, TB,
- OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>;
+ (SUBREG_TO_REG (i64 0),
+ (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
@@ -2740,16 +2743,18 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
def : Pat<(i32 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>,
Requires<[UseSSE1]>;
def : Pat<(i64 (X86fgetsign FR32:$src)),
- (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+ (SUBREG_TO_REG (i64 0),
+ (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>,
Requires<[UseSSE1]>;
def : Pat<(i32 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>,
Requires<[UseSSE2]>;
def : Pat<(i64 (X86fgetsign FR64:$src)),
- (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+ (SUBREG_TO_REG (i64 0),
+ (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>,
Requires<[UseSSE2]>;
//===---------------------------------------------------------------------===//
@@ -2788,7 +2793,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
OpndItins itins, bit IsCommutable = 0> {
let Predicates = [HasAVX] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, memopv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2796,7 +2801,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
- OpVT256, VR256, memopv4i64, i256mem, itins,
+ OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
}
@@ -2836,16 +2841,18 @@ multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
}
// Alias bitwise logical operations using SSE logical ops on packed FP values.
-defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
- SSE_BIT_ITINS_P>;
-defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
- SSE_BIT_ITINS_P>;
-defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
- SSE_BIT_ITINS_P>;
-
-let isCommutable = 0 in
- defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
+let isCodeGenOnly = 1 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+ SSE_BIT_ITINS_P>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for,
SSE_BIT_ITINS_P>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+ SSE_BIT_ITINS_P>;
+
+ let isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn,
+ SSE_BIT_ITINS_P>;
+}
/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
@@ -2855,14 +2862,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "ps"), f256mem,
[(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
+ (loadv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
- (memopv4i64 addr:$src2)))], 0>,
+ (loadv4i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V, VEX_L;
// In AVX no need to add a pattern for 128-bit logical rr ps, because they
@@ -2872,14 +2879,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, [],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+ (loadv2i64 addr:$src2)))], 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(bc_v2i64 (v2f64 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))], 0>,
+ (loadv2i64 addr:$src2)))], 0>,
TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
@@ -2921,120 +2928,93 @@ let isCommutable = 0 in
/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
/// classes below
-multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
- OpNode, FR32, f32mem,
- itins.s, Is2Addr>, XS;
- defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
- OpNode, FR64, f64mem,
- itins.d, Is2Addr>, XD;
-}
-
multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
SDNode OpNode, SizeItins itins> {
-let Predicates = [HasAVX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- VR128, v4f32, f128mem, memopv4f32,
+ VR128, v4f32, f128mem, loadv4f32,
SSEPackedSingle, itins.s, 0>, TB, VEX_4V;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- VR128, v2f64, f128mem, memopv2f64,
+ VR128, v2f64, f128mem, loadv2f64,
SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, VR256, v8f32, f256mem, memopv8f32,
+ OpNode, VR256, v8f32, f256mem, loadv8f32,
SSEPackedSingle, itins.s, 0>, TB, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, VR256, v4f64, f256mem, memopv4f64,
+ OpNode, VR256, v4f64, f256mem, loadv4f64,
SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_4V, VEX_L;
-}
-
-let Constraints = "$src1 = $dst" in {
- defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
- v4f32, f128mem, memopv4f32, SSEPackedSingle,
- itins.s, 1>, TB;
- defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
- v2f64, f128mem, memopv2f64, SSEPackedDouble,
- itins.d, 1>, TB, OpSize;
-}
-}
-multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- SizeItins itins,
- bit Is2Addr = 1> {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
- itins.s, Is2Addr>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
- itins.d, Is2Addr>, XD;
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle,
+ itins.s>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble,
+ itins.d>, TB, OpSize;
+ }
}
-// Binary Arithmetic instructions
-defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>;
-defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>;
-let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>;
- defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>;
- defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>;
- defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>;
-}
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SizeItins itins> {
+ defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG;
+ defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG;
-let isCodeGenOnly = 1 in {
- defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
- defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+ let Constraints = "$src1 = $dst" in {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, itins.s>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, itins.d>, XD;
+ }
}
-defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ SizeItins itins> {
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s, 0>, XS, VEX_4V, VEX_LIG;
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d, 0>, XD, VEX_4V, VEX_LIG;
-let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
+ let Constraints = "$src1 = $dst" in {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+ itins.s>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+ itins.d>, XD;
+ }
}
-let Constraints = "$src1 = $dst" in {
- defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
- defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
-
- let isCommutable = 0 in {
- defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
- defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
- defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
- defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
- }
+// Binary Arithmetic instructions
+defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+ basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
+let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+ defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+ defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
+ defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
}
let isCodeGenOnly = 1 in {
- defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
- VEX_4V, VEX_LIG;
- let Constraints = "$src1 = $dst" in {
- defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
- defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
- }
+ defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>;
+ defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>,
+ basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>;
}
/// Unop Arithmetic
@@ -3180,7 +3160,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))],
+ [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3190,7 +3170,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+ [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3217,7 +3197,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+ [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3228,7 +3208,7 @@ let Predicates = [HasAVX] in {
(ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))],
+ [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3298,7 +3278,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))],
+ [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))],
itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
@@ -3308,7 +3288,7 @@ let Predicates = [HasAVX] in {
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+ [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))],
itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
}
@@ -3341,30 +3321,31 @@ defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>,
sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps,
int_x86_avx_rcp_ps_256, SSE_RCPP>;
-def : Pat<(f32 (fsqrt FR32:$src)),
- (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (fsqrt (load addr:$src))),
- (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-def : Pat<(f64 (fsqrt FR64:$src)),
- (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
-def : Pat<(f64 (fsqrt (load addr:$src))),
- (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frsqrt FR32:$src)),
- (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-def : Pat<(f32 (X86frcp FR32:$src)),
- (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
-def : Pat<(f32 (X86frcp (load addr:$src))),
- (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[HasAVX, OptForSize]>;
-
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
+ def : Pat<(f32 (fsqrt FR32:$src)),
+ (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (fsqrt (load addr:$src))),
+ (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+ def : Pat<(f64 (fsqrt FR64:$src)),
+ (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f64 (fsqrt (load addr:$src))),
+ (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frsqrt FR32:$src)),
+ (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frsqrt (load addr:$src))),
+ (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+
+ def : Pat<(f32 (X86frcp FR32:$src)),
+ (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
+ def : Pat<(f32 (X86frcp (load addr:$src))),
+ (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasAVX, OptForSize]>;
+}
+let Predicates = [UseAVX] in {
def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3378,7 +3359,9 @@ let Predicates = [HasAVX] in {
VR128)>;
def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
(VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
+}
+let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
(COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
(COPY_TO_REGCLASS VR128:$src, FR32)),
@@ -3662,7 +3645,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
XS, Requires<[UseSSE2]>;
}
-let mayStore = 1, SchedRW = [WriteStore] in {
+let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in {
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
@@ -3725,7 +3708,7 @@ multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
bit IsCommutable = 0> {
let Predicates = [HasAVX] in
defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
- VR128, memopv2i64, i128mem, itins,
+ VR128, loadv2i64, i128mem, itins,
IsCommutable, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
@@ -3734,7 +3717,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2] in
defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
- VR256, memopv4i64, i256mem, itins,
+ VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
}
@@ -3761,11 +3744,11 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
(bc_frag (memopv2i64 addr:$src2)))))], itins.rm>,
Sched<[WriteVecShiftLd, ReadAfterLd]>;
def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
- (ins RC:$src1, i32i8imm:$src2),
+ (ins RC:$src1, i8imm:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>,
+ [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>,
Sched<[WriteVecShift]>;
}
@@ -3849,15 +3832,15 @@ defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- int_x86_avx2_psad_bw, SSE_INTALU_ITINS_P, 1>;
+ int_x86_avx2_psad_bw, SSE_PMADD, 1>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
- memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
VEX_4V;
let Predicates = [HasAVX2] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
- VR256, memopv4i64, i256mem,
+ VR256, loadv4i64, i256mem,
SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
@@ -3993,12 +3976,14 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
def PSRLDQri : PDIi8<0x73, MRM3r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"psrldq\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+ (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
+ IIC_SSE_INTSHDQ_P_RI>;
// PSRADQri doesn't exist in SSE[1-3].
}
} // Constraints = "$src1 = $dst"
@@ -4082,14 +4067,14 @@ let Predicates = [HasAVX] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX,
+ (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
Sched<[WriteShuffleLd]>;
}
@@ -4100,14 +4085,14 @@ let Predicates = [HasAVX2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, i8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
- (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L,
+ (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
Sched<[WriteShuffleLd]>;
}
@@ -4118,14 +4103,14 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF>, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
- (i8 imm:$src2))))], IIC_SSE_PSHUF>,
+ (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
Sched<[WriteShuffleLd]>;
}
}
@@ -4136,7 +4121,7 @@ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS;
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD;
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+ def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))),
(VPSHUFDmi addr:$src1, imm:$imm)>;
def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
(VPSHUFDri VR128:$src1, imm:$imm)>;
@@ -4259,13 +4244,13 @@ let ExeDomain = SSEPackedInt in {
multiclass sse2_pinsrw<bit Is2Addr = 1> {
def rri : Ii8<0xC4, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1,
- GR32:$src2, i32i8imm:$src3),
+ GR32orGR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>,
- Sched<[WriteShuffle]>;
+ (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))],
+ IIC_SSE_PINSRW>, Sched<[WriteShuffle]>;
def rmi : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, i32i8imm:$src3),
@@ -4281,29 +4266,24 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
// Extract
let Predicates = [HasAVX] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))]>, TB, OpSize, VEX,
+ [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, TB, OpSize, VEX,
Sched<[WriteShuffle]>;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
- (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
- imm:$src2))], IIC_SSE_PEXTRW>,
+ [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))], IIC_SSE_PEXTRW>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
-let Predicates = [HasAVX] in {
- defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
- def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
- "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>;
-}
+let Predicates = [HasAVX] in
+defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>;
+let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
+defm PINSRW : sse2_pinsrw, TB, OpSize;
} // ExeDomain = SSEPackedInt
@@ -4313,24 +4293,23 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {
-def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
IIC_SSE_MOVMSK>, VEX;
-def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
let Predicates = [HasAVX2] in {
-def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L;
-def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+ [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>,
+ VEX, VEX_L;
}
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
IIC_SSE_MOVMSK>;
} // ExeDomain = SSEPackedInt
@@ -4341,25 +4320,25 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [HasAVX,In32BitMode] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
IIC_SSE_MASKMOV>, VEX;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
IIC_SSE_MASKMOV>, VEX;
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [UseSSE2,In32BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
IIC_SSE_MASKMOV>;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
@@ -4386,12 +4365,13 @@ def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
IIC_SSE_MOVDQ>,
VEX, Sched<[WriteLoad]>;
def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+let isCodeGenOnly = 1 in
def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
@@ -4410,6 +4390,7 @@ def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
[(set VR128:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+let isCodeGenOnly = 1 in
def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (bitconvert GR64:$src))],
@@ -4418,25 +4399,27 @@ def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Single Scalar
//
-def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
-
-def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>,
- VEX, Sched<[WriteLoad]>;
-def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
-
-def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+let isCodeGenOnly = 1 in {
+ def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+
+ def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>,
+ VEX, Sched<[WriteLoad]>;
+ def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))],
+ IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+
+ def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+}
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int to Packed Double Int
@@ -4463,12 +4446,24 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
+
+def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
+
+def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
+ (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
+
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "movq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
@@ -4484,121 +4479,112 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
//===---------------------------------------------------------------------===//
// Bitcast FR64 <-> GR64
//
-let Predicates = [HasAVX] in
-def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
- VEX, Sched<[WriteLoad]>;
-def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+let isCodeGenOnly = 1 in {
+ let Predicates = [UseAVX] in
+ def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ VEX, Sched<[WriteLoad]>;
+ def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bitconvert FR64:$src))],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
+ def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+
+ def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
+ IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
+ def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>;
-def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+ def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
-
-def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
- IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
-def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))],
- IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
-def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+}
//===---------------------------------------------------------------------===//
// Move Scalar Single to Double Int
//
-def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
-def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
-def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bitconvert FR32:$src))],
- IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
-def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+let isCodeGenOnly = 1 in {
+ def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>;
+ def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
+ def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
+ def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+ IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+}
//===---------------------------------------------------------------------===//
// Patterns and instructions to describe movd/movq to XMM register zero-extends
//
-let SchedRW = [WriteMove] in {
+let isCodeGenOnly = 1, SchedRW = [WriteMove] in {
let AddedComplexity = 15 in {
-def VMOVZDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))],
- IIC_SSE_MOVDQ>, VEX;
def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ "movq\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>,
VEX, VEX_W;
-}
-let AddedComplexity = 15 in {
-def MOVZDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector GR32:$src)))))],
- IIC_SSE_MOVDQ>;
def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))],
IIC_SSE_MOVDQ>;
}
-} // SchedRW
+} // isCodeGenOnly, SchedRW
-let AddedComplexity = 20, SchedRW = [WriteLoad] in {
-def VMOVZDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))],
- IIC_SSE_MOVDQ>, VEX;
-def MOVZDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
- (loadi32 addr:$src))))))],
- IIC_SSE_MOVDQ>;
-} // AddedComplexity, SchedRW
+let Predicates = [UseAVX] in {
+ let AddedComplexity = 15 in
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIrr GR32:$src)>;
-let Predicates = [HasAVX] in {
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (VMOVZDI2PDIrm addr:$src)>;
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (VMOVZDI2PDIrm addr:$src)>;
+ (VMOVDI2PDIrm addr:$src)>;
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>;
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
-let Predicates = [UseSSE2], AddedComplexity = 20 in {
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (MOVZDI2PDIrm addr:$src)>;
+let Predicates = [UseSSE2] in {
+ let AddedComplexity = 15 in
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (MOVDI2PDIrr GR32:$src)>;
+
+ let AddedComplexity = 20 in {
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>;
+ }
}
// These are the correct encodings of the instructions so that we know how to
@@ -4607,15 +4593,12 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in {
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
-def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
- (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX.
+def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
+ (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
+ (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Quadword
@@ -4630,7 +4613,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[HasAVX]>;
+ VEX, Requires<[UseAVX]>;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4667,16 +4650,15 @@ def MOVLQ128mr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
-let AddedComplexity = 20 in
+let isCodeGenOnly = 1, AddedComplexity = 20 in {
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>;
+ XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>;
-let AddedComplexity = 20 in
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4684,10 +4666,9 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(loadi64 addr:$src))))))],
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
+}
-let Predicates = [HasAVX], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZQI2PQIrm addr:$src)>;
+let Predicates = [UseAVX], AddedComplexity = 20 in {
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
@@ -4695,8 +4676,6 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
}
let Predicates = [UseSSE2], AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
(MOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
@@ -4719,7 +4698,7 @@ def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[UseAVX]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -4728,14 +4707,14 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
XS, Requires<[UseSSE2]>;
} // SchedRW
-let SchedRW = [WriteVecLogicLd] in {
+let isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
(loadv2i64 addr:$src))))],
IIC_SSE_MOVDQ>,
- XS, VEX, Requires<[HasAVX]>;
+ XS, VEX, Requires<[UseAVX]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -4744,49 +4723,19 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
IIC_SSE_MOVDQ>,
XS, Requires<[UseSSE2]>;
}
-} // SchedRW
+} // isCodeGenOnly, SchedRW
let AddedComplexity = 20 in {
- let Predicates = [HasAVX] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVZPQILo2PQIrm addr:$src)>;
+ let Predicates = [UseAVX] in {
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(VMOVZPQILo2PQIrr VR128:$src)>;
}
let Predicates = [UseSSE2] in {
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (MOVZPQILo2PQIrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>;
}
}
-// Instructions to match in the assembler
-let SchedRW = [WriteMove] in {
-def VMOVQs64rr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-def VMOVQd64rr : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-// Recognize "movd" with GR64 destination, but encode as a "movq"
-def VMOVQd64rr_alt : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "movd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVDQ>, VEX, VEX_W;
-} // SchedRW
-
-// Instructions for the disassembler
-// xr = XMM register
-// xm = mem64
-
-let SchedRW = [WriteMove] in {
-let Predicates = [HasAVX] in
-def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
-def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
-} // SchedRW
-
//===---------------------------------------------------------------------===//
// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
@@ -4805,13 +4754,13 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
let Predicates = [HasAVX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v4f32, VR128, memopv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v4f32, VR128, memopv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem>;
@@ -4821,19 +4770,19 @@ defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVSHDUPrm addr:$src)>;
def : Pat<(v4i32 (X86Movsldup VR128:$src)),
(VMOVSLDUPrr VR128:$src)>;
- def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+ def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVSLDUPrm addr:$src)>;
def : Pat<(v8i32 (X86Movshdup VR256:$src)),
(VMOVSHDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))),
(VMOVSHDUPYrm addr:$src)>;
def : Pat<(v8i32 (X86Movsldup VR256:$src)),
(VMOVSLDUPYrr VR256:$src)>;
- def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))),
+ def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))),
(VMOVSLDUPYrm addr:$src)>;
}
@@ -4887,20 +4836,20 @@ let Predicates = [HasAVX] in {
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
let Predicates = [HasAVX] in {
- def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+ def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+ def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
// 256-bit version
- def : Pat<(X86Movddup (memopv4f64 addr:$src)),
+ def : Pat<(X86Movddup (loadv4f64 addr:$src)),
(VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (memopv4i64 addr:$src)),
+ def : Pat<(X86Movddup (loadv4i64 addr:$src)),
(VMOVDDUPYrm addr:$src)>;
def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
(VMOVDDUPYrm addr:$src)>;
@@ -5102,12 +5051,12 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
+def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
+def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
@@ -5263,34 +5212,34 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+ (bitconvert (loadv4i64 addr:$src2))))]>, OpSize;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBW, 0>, VEX_4V;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSIGN, 0>, VEX_4V;
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
- memopv2i64, i128mem,
+ loadv2i64, i128mem,
SSE_PSHUFB, 0>, VEX_4V;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
@@ -5310,28 +5259,28 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
- memopv4i64, i256mem,
+ loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw>, VEX_4V, VEX_L;
@@ -5389,7 +5338,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>;
+ [], IIC_SSE_PALIGNRR>, OpSize, Sched<[WriteShuffle]>;
let mayLoad = 1 in
def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
@@ -5397,7 +5346,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> {
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ [], IIC_SSE_PALIGNRM>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
}
@@ -5489,16 +5438,17 @@ def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>,
// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
- OpSize;
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))],
+ itins.rm>, OpSize;
}
multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
@@ -5509,22 +5459,23 @@ multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+ [(set VR256:$dst, (IntId (load addr:$src)))]>,
+ OpSize;
}
let Predicates = [HasAVX] in {
-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
- VEX;
-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
- VEX;
-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
- VEX;
-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
- VEX;
-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
- VEX;
-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
- VEX;
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw",
+ int_x86_sse41_pmovsxbw>, VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd",
+ int_x86_sse41_pmovsxwd>, VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq",
+ int_x86_sse41_pmovsxdq>, VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw",
+ int_x86_sse41_pmovzxbw>, VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd",
+ int_x86_sse41_pmovzxwd>, VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq",
+ int_x86_sse41_pmovzxdq>, VEX;
}
let Predicates = [HasAVX2] in {
@@ -5542,12 +5493,12 @@ defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
int_x86_avx2_pmovzxdq>, VEX, VEX_L;
}
-defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
-defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
-defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
-defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
-defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
-defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw, SSE_INTALU_ITINS_P>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd, SSE_INTALU_ITINS_P>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq, SSE_INTALU_ITINS_P>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw, SSE_INTALU_ITINS_P>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd, SSE_INTALU_ITINS_P>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq, SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load.
@@ -5645,32 +5596,39 @@ let Predicates = [HasAVX2] in {
(VPMOVZXDQYrr VR128:$src)>;
def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))),
(VPMOVZXWDYrr VR128:$src)>;
+ def : Pat<(v16i16 (X86vzmovly (v16i8 VR128:$src))),
+ (VPMOVZXBWYrr VR128:$src)>;
}
def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>;
def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>;
+ def : Pat<(v16i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
}
let Predicates = [HasAVX] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+ def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (VPMOVSXBWrr VR128:$src)>;
}
let Predicates = [UseSSE41] in {
def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
+ def : Pat<(v8i16 (X86vsmovl (v16i8 VR128:$src))), (PMOVSXBWrr VR128:$src)>;
}
-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ [(set VR128:$dst, (IntId VR128:$src))], itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))],
+ itins.rm>,
OpSize;
}
@@ -5709,10 +5667,14 @@ defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
int_x86_avx2_pmovzxwq>, VEX, VEX_L;
}
-defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
-defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
-defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
-defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX] in {
// Common patterns involving scalar load
@@ -5740,7 +5702,8 @@ let Predicates = [UseSSE41] in {
(PMOVZXWQrm addr:$src)>;
}
-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
@@ -5779,8 +5742,10 @@ defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
int_x86_avx2_pmovzxbq>, VEX, VEX_L;
}
-defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq,
+ SSE_INTALU_ITINS_P>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq,
+ SSE_INTALU_ITINS_P>;
let Predicates = [HasAVX2] in {
def : Pat<(v16i16 (X86vsext (v16i8 VR128:$src))), (VPMOVSXBWYrr VR128:$src)>;
@@ -6032,35 +5997,39 @@ let Predicates = [UseSSE41] in {
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
+ imm:$src2))]>,
OpSize;
let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// FIXME:
// There's an AssertZext in the way of writing the store pattern
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
- def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
-}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in
+ def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
let neverHasSideEffects = 1, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
@@ -6122,31 +6091,28 @@ defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
-multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
+ OpndItins itins = DEFAULT_ITINS> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ [(set GR32orGR64:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))],
+ itins.rr>,
OpSize;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>, OpSize;
+ addr:$dst)], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
- let Predicates = [UseAVX] in {
+ let Predicates = [UseAVX] in
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
- def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, OpSize, VEX;
- }
- defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+ defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
}
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -6167,13 +6133,13 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
!if(Is2Addr,
@@ -6246,7 +6212,8 @@ let Constraints = "$src1 = $dst" in
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
-multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, u32u8imm:$src3),
!if(Is2Addr,
@@ -6254,7 +6221,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
OpSize;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
@@ -6265,14 +6232,14 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
[(set VR128:$dst,
(X86insrtps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
+ imm:$src3))], itins.rm>, OpSize;
}
let ExeDomain = SSEPackedSingle in {
- let Predicates = [HasAVX] in
+ let Predicates = [UseAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
//===----------------------------------------------------------------------===//
@@ -6290,7 +6257,8 @@ let ExeDomain = SSEPackedSingle in {
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
@@ -6299,7 +6267,8 @@ let ExeDomain = SSEPackedSingle in {
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_MEM>,
OpSize;
} // ExeDomain = SSEPackedSingle
@@ -6309,7 +6278,8 @@ let ExeDomain = SSEPackedDouble in {
(outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
// Vector intrinsic operation, mem
@@ -6318,7 +6288,8 @@ let ExeDomain = SSEPackedDouble in {
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
- (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))],
+ IIC_SSE_ROUNDPS_REG>,
OpSize;
} // ExeDomain = SSEPackedDouble
}
@@ -6402,11 +6373,11 @@ let ExeDomain = GenericDomain in {
let Predicates = [HasAVX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
- memopv4f32, memopv2f64,
+ loadv4f32, loadv2f64,
int_x86_sse41_round_ps,
int_x86_sse41_round_pd>, VEX;
defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
- memopv8f32, memopv4f64,
+ loadv8f32, loadv4f64,
int_x86_avx_round_ps_256,
int_x86_avx_round_pd_256>, VEX, VEX_L;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
@@ -6544,7 +6515,7 @@ def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
OpSize, VEX;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
OpSize, VEX;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -6553,7 +6524,7 @@ def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
OpSize, VEX, VEX_L;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
OpSize, VEX, VEX_L;
}
@@ -6582,13 +6553,13 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
let Defs = [EFLAGS], Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
-defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
-defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>,
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>,
VEX_L;
}
let ExeDomain = SSEPackedDouble in {
-defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
-defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>,
VEX_L;
}
}
@@ -6600,30 +6571,33 @@ defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>,
let Defs = [EFLAGS], Predicates = [HasPOPCNT] in {
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>,
+ [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
OpSize, XS;
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}",
[(set GR16:$dst, (ctpop (loadi16 addr:$src))),
- (implicit EFLAGS)]>, OpSize, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>,
+ [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (ctpop (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>,
+ [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)],
+ IIC_SSE_POPCNT_RR>,
XS;
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (ctpop (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS;
+ (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, XS;
}
@@ -6651,14 +6625,16 @@ defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId128, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))],
+ itins.rr>, OpSize;
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!if(Is2Addr,
@@ -6666,7 +6642,8 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst,
(IntId128 VR128:$src1,
- (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+ (bitconvert (memopv2i64 addr:$src2))))],
+ itins.rm>, OpSize;
}
/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator
@@ -6682,14 +6659,15 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(IntId256 VR256:$src1,
- (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
+ (bitconvert (loadv4i64 addr:$src2))))]>, OpSize;
}
/// SS48I_binop_rm - Simple SSE41 binary operator.
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
@@ -6712,21 +6690,21 @@ let Predicates = [HasAVX] in {
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
0>, VEX_4V;
}
@@ -6736,21 +6714,21 @@ let Predicates = [HasAVX2] in {
defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
int_x86_avx2_packusdw>, VEX_4V, VEX_L;
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
int_x86_avx2_pmul_dq>, VEX_4V, VEX_L;
}
@@ -6759,22 +6737,23 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
- memopv2i64, i128mem>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+ memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq,
+ 1, SSE_INTMUL_ITINS_P>;
}
let Predicates = [HasAVX] in {
@@ -6792,15 +6771,16 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>;
defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
- memopv2i64, i128mem>;
+ memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>;
}
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop, bit Is2Addr = 1> {
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
let isCommutable = 1 in
def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
@@ -6809,7 +6789,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>,
OpSize;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
@@ -6820,7 +6800,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>,
OpSize;
}
@@ -6828,40 +6808,40 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ VR128, loadv4f32, f128mem, 0>, VEX_4V;
defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, memopv8f32,
+ int_x86_avx_blend_ps_256, VR256, loadv8f32,
f256mem, 0>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ VR128, loadv2f64, f128mem, 0>, VEX_4V;
defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, memopv4f64,
+ int_x86_avx_blend_pd_256,VR256, loadv4f64,
f256mem, 0>, VEX_4V, VEX_L;
}
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, 0>, VEX_4V;
}
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem, 0>, VEX_4V;
+ VR128, loadv4f32, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem, 0>, VEX_4V;
+ VR128, loadv2f64, f128mem, 0>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
- VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv8f32, i256mem, 0>, VEX_4V, VEX_L;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
- VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ VR256, loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
}
}
@@ -6869,21 +6849,27 @@ let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
let ExeDomain = SSEPackedSingle in
defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
let ExeDomain = SSEPackedDouble in
defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_P>;
defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
- VR128, memopv2i64, i128mem>;
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTMUL_ITINS_P>;
}
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
- VR128, memopv4f32, f128mem>;
+ VR128, memopv4f32, f128mem, 1,
+ SSE_DPPS_ITINS>;
let ExeDomain = SSEPackedDouble in
defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
- VR128, memopv2f64, f128mem>;
+ VR128, memopv2f64, f128mem, 1,
+ SSE_DPPD_ITINS>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
@@ -6910,23 +6896,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedDouble in {
defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem,
- memopv2f64, int_x86_sse41_blendvpd>;
+ loadv2f64, int_x86_sse41_blendvpd>;
defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem,
- memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
+ loadv4f64, int_x86_avx_blendv_pd_256>, VEX_L;
} // ExeDomain = SSEPackedDouble
let ExeDomain = SSEPackedSingle in {
defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem,
- memopv4f32, int_x86_sse41_blendvps>;
+ loadv4f32, int_x86_sse41_blendvps>;
defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem,
- memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
+ loadv8f32, int_x86_avx_blendv_ps_256>, VEX_L;
} // ExeDomain = SSEPackedSingle
defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
- memopv2i64, int_x86_sse41_pblendvb>;
+ loadv2i64, int_x86_sse41_pblendvb>;
}
let Predicates = [HasAVX2] in {
defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
- memopv4i64, int_x86_avx2_pblendvb>, VEX_L;
+ loadv4i64, int_x86_avx2_pblendvb>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -6979,7 +6965,7 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
- (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+ (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
(imm:$mask))),
(VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
@@ -6988,13 +6974,14 @@ let Predicates = [HasAVX2] in {
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- X86MemOperand x86memop, Intrinsic IntId> {
+ X86MemOperand x86memop, Intrinsic IntId,
+ OpndItins itins = DEFAULT_ITINS> {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
+ itins.rr>, OpSize;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
@@ -7002,7 +6989,8 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
"\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId VR128:$src1,
- (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
+ (bitconvert (mem_frag addr:$src2)), XMM0))],
+ itins.rm>, OpSize;
}
}
@@ -7099,11 +7087,11 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- memopv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- memopv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -7263,70 +7251,100 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
+class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
+ RegisterClass RCIn, SDPatternOperator Int> :
+ SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
+ !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
+ [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>;
+
+class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
+ X86MemOperand x86memop, SDPatternOperator Int> :
+ SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
+ !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
+ [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
+ IIC_CRC32_MEM>;
+
let Constraints = "$src1 = $dst" in {
- def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i8mem:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_8 GR32:$src1,
- (load addr:$src2)))]>;
- def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR8:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>;
- def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i16mem:$src2),
- "crc32{w}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_16 GR32:$src1,
- (load addr:$src2)))]>,
- OpSize;
- def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR16:$src2),
- "crc32{w}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>,
- OpSize;
- def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "crc32{l}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_32 GR32:$src1,
- (load addr:$src2)))]>;
- def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "crc32{l}\t{$src2, $src1|$src1, $src2}",
- [(set GR32:$dst,
- (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>;
- def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i8mem:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_8 GR64:$src1,
- (load addr:$src2)))]>,
- REX_W;
- def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR8:$src2),
- "crc32{b}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>,
- REX_W;
- def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "crc32{q}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_64 GR64:$src1,
- (load addr:$src2)))]>,
- REX_W;
- def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "crc32{q}\t{$src2, $src1|$src1, $src2}",
- [(set GR64:$dst,
- (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>,
- REX_W;
+ def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
+ int_x86_sse42_crc32_32_8>;
+ def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8,
+ int_x86_sse42_crc32_32_8>;
+ def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem,
+ int_x86_sse42_crc32_32_16>, OpSize;
+ def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16,
+ int_x86_sse42_crc32_32_16>, OpSize;
+ def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem,
+ int_x86_sse42_crc32_32_32>;
+ def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32,
+ int_x86_sse42_crc32_32_32>;
+ def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem,
+ int_x86_sse42_crc32_64_64>, REX_W;
+ def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64,
+ int_x86_sse42_crc32_64_64>, REX_W;
+ let hasSideEffects = 0 in {
+ let mayLoad = 1 in
+ def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem,
+ null_frag>, REX_W;
+ def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8,
+ null_frag>, REX_W;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// SHA-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
+ bit UsesXMM0 = 0> {
+ def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
+
+ def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [!if(UsesXMM0,
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
+ (set VR128:$dst, (IntId VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8;
+}
+
+let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
+ def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
+ (i8 imm:$src3)))]>, TA;
+ def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR128:$dst,
+ (int_x86_sha1rnds4 VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)),
+ (i8 imm:$src3)))]>, TA;
+
+ defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>;
+ defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>;
+ defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>;
+
+ let Uses=[XMM0] in
+ defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>;
+
+ defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>;
+ defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>;
}
+// Aliases with explicit %xmm0
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
+def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
+
//===----------------------------------------------------------------------===//
// AES-NI Instructions
//===----------------------------------------------------------------------===//
@@ -7383,7 +7401,7 @@ let Predicates = [HasAVX, HasAES] in {
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
- [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
+ [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
OpSize, VEX;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -7410,7 +7428,7 @@ let Predicates = [HasAVX, HasAES] in {
(ins i128mem:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
+ (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
OpSize, VEX;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -7441,7 +7459,7 @@ def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
- (memopv2i64 addr:$src2), imm:$src3))]>;
+ (loadv2i64 addr:$src2), imm:$src3))]>;
// Carry-less Multiplication instructions
let Constraints = "$src1 = $dst" in {
@@ -7449,13 +7467,15 @@ def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
- (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>;
+ (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RR>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, i8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
- (memopv2i64 addr:$src2), imm:$src3))]>;
+ (memopv2i64 addr:$src2), imm:$src3))],
+ IIC_SSE_PCLMULQDQ_RM>;
} // Constraints = "$src1 = $dst"
@@ -7595,11 +7615,11 @@ def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7623,22 +7643,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -7670,12 +7690,12 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v4f64 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTF128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
@@ -7785,15 +7805,15 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
- memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+ loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
+ loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
- memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+ loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
- memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
+ loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
let Predicates = [HasAVX] in {
@@ -7801,15 +7821,15 @@ def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPSYri VR256:$src1, imm:$imm)>;
def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
(VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (loadv4i64 addr:$src1)),
(i8 imm:$imm))),
(VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp (loadv4i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDYmi addr:$src1, imm:$imm)>;
def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
(VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+def : Pat<(v2i64 (X86VPermilp (loadv2i64 addr:$src1), (i8 imm:$imm))),
(VPERMILPDmi addr:$src1, imm:$imm)>;
}
@@ -7825,7 +7845,7 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L;
}
@@ -7833,7 +7853,7 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
- (memopv4f64 addr:$src2), (i8 imm:$imm))),
+ (loadv4f64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -7848,16 +7868,16 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
- (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
- (memopv4i64 addr:$src2), (i8 imm:$imm))),
+ (loadv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
- (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
- (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -7901,7 +7921,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
TA, OpSize, VEX;
}
-let Predicates = [HasAVX, HasF16C] in {
+let Predicates = [HasF16C] in {
defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
@@ -7935,9 +7955,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
let isCommutable = 0 in {
defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, memopv2i64, i128mem>;
+ VR128, loadv2i64, i128mem>;
defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, memopv4i64, i256mem>, VEX_L;
+ VR256, loadv4i64, i256mem>, VEX_L;
}
def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
@@ -8115,9 +8135,9 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
VEX_4V, VEX_L;
}
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT> {
@@ -8137,9 +8157,9 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(i8 imm:$src2))))]>, VEX, VEX_L;
}
-defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W;
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64>, VEX_W;
let ExeDomain = SSEPackedDouble in
-defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
@@ -8152,7 +8172,7 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+ [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in {
@@ -8163,13 +8183,13 @@ def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)),
(i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
- (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+ (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)),
(i8 imm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
@@ -8208,22 +8228,22 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2),
+def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
- (bc_v4i32 (memopv2i64 addr:$src2)),
+ (bc_v4i32 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
- (bc_v16i8 (memopv2i64 addr:$src2)),
+ (bc_v16i8 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
- (bc_v8i16 (memopv2i64 addr:$src2)),
+ (bc_v8i16 (loadv2i64 addr:$src2)),
(iPTR imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
@@ -8262,20 +8282,20 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v32i8 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
- (iPTR imm))), addr:$dst),
+def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
+ (iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,
(EXTRACT_get_vextract128_imm VR128:$ext))>;
}
@@ -8333,7 +8353,7 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1,
- (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
+ (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
VEX_4V;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -8346,7 +8366,7 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1,
- (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
+ (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>,
VEX_4V, VEX_L;
}
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 2aa08fa..2b6ee5c 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -20,23 +20,21 @@ multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
- defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
- defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
- defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
- defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
- defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
- defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
- defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
- defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
- defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
- defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
- defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
- defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
- defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
- defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
-}
+defm VPHSUBWD : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+defm VPHSUBDQ : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+defm VPHSUBBW : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+defm VPHADDWQ : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+defm VPHADDWD : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+defm VPHADDDQ : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+defm VPHADDBW : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+defm VPHADDBQ : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+defm VPHADDBD : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
// Scalar load 2 addr operand instructions
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
@@ -49,12 +47,10 @@ multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
- ssmem, sse_load_f32>;
- defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
- sdmem, sse_load_f64>;
-}
+defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+ ssmem, sse_load_f32>;
+defm VFRCZSD : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+ sdmem, sse_load_f64>;
multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop> {
@@ -66,10 +62,8 @@ multiclass xop2op128<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
- defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
-}
+defm VFRCZPS : xop2op128<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+defm VFRCZPD : xop2op128<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
PatFrag memop> {
@@ -81,12 +75,8 @@ multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
[(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX, VEX_L;
}
-let isAsmParserOnly = 1 in {
- defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
- memopv8f32>;
- defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
- memopv4f64>;
-}
+defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256, memopv8f32>;
+defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, memopv4f64>;
multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
@@ -107,20 +97,18 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4VOp3;
}
-let isAsmParserOnly = 1 in {
- defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
- defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
- defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
- defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
- defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
- defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
- defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
- defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
- defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
- defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
- defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
- defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
-}
+defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
@@ -134,12 +122,10 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
(Int (bitconvert (memopv2i64 addr:$src1)), imm:$src2))]>, VEX;
}
-let isAsmParserOnly = 1 in {
- defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
- defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
- defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
- defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
-}
+defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
+defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
+defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
+defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
// Instruction where second source can be memory, but third must be register
multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -158,20 +144,18 @@ multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VR128:$src3))]>, VEX_4V, VEX_I8IMM;
}
-let isAsmParserOnly = 1 in {
- defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
- defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
- defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
- defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
- defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
- defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
- defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
- defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
- defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
- defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
- defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
- defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
-}
+defm VPMADCSWD : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+defm VPMACSWW : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+defm VPMACSWD : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+defm VPMACSSWW : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+defm VPMACSSWD : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+defm VPMACSSDD : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+defm VPMACSDQL : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+defm VPMACSDQH : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
// Instruction where second source can be memory, third must be imm8
multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -190,16 +174,14 @@ multiclass xop4opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
imm:$src3))]>, VEX_4V;
}
-let isAsmParserOnly = 1 in {
- defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
- defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
- defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
- defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
- defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
- defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
- defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
- defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
-}
+defm VPCOMB : xop4opimm<0xCC, "vpcomb", int_x86_xop_vpcomb>;
+defm VPCOMW : xop4opimm<0xCD, "vpcomw", int_x86_xop_vpcomw>;
+defm VPCOMD : xop4opimm<0xCE, "vpcomd", int_x86_xop_vpcomd>;
+defm VPCOMQ : xop4opimm<0xCF, "vpcomq", int_x86_xop_vpcomq>;
+defm VPCOMUB : xop4opimm<0xEC, "vpcomub", int_x86_xop_vpcomub>;
+defm VPCOMUW : xop4opimm<0xED, "vpcomuw", int_x86_xop_vpcomuw>;
+defm VPCOMUD : xop4opimm<0xEE, "vpcomud", int_x86_xop_vpcomud>;
+defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", int_x86_xop_vpcomuq>;
// Instruction where either second or third source can be memory
multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
@@ -227,10 +209,8 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4V, VEX_I8IMM;
}
-let isAsmParserOnly = 1 in {
- defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
- defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
-}
+defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
@@ -257,9 +237,7 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
VEX_4V, VEX_I8IMM, VEX_L;
}
-let isAsmParserOnly = 1 in {
- defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
-}
+defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index fc86e1e..e99f2d9 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -127,7 +127,7 @@ extern "C" {
"movaps %xmm6, 96(%rsp)\n"
"movaps %xmm7, 112(%rsp)\n"
// JIT callee
-#ifdef _WIN64
+#if defined(_WIN64) || defined(__CYGWIN__)
"subq $32, %rsp\n"
"movq %rbp, %rcx\n" // Pass prev frame and return address
"movq 8(%rbp), %rdx\n"
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index c7c00b5..6649c82 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -17,6 +17,7 @@
#include "X86COFFMachineModuleInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -34,14 +35,12 @@ namespace {
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class X86MCInstLower {
MCContext &Ctx;
- Mangler *Mang;
const MachineFunction &MF;
const TargetMachine &TM;
const MCAsmInfo &MAI;
X86AsmPrinter &AsmPrinter;
public:
- X86MCInstLower(Mangler *mang, const MachineFunction &MF,
- X86AsmPrinter &asmprinter);
+ X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
@@ -50,13 +49,16 @@ public:
private:
MachineModuleInfoMachO &getMachOMMI() const;
+ Mangler *getMang() const {
+ return AsmPrinter.Mang;
+ }
};
} // end anonymous namespace
-X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
X86AsmPrinter &asmprinter)
-: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+: Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()),
MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
@@ -81,7 +83,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
isImplicitlyPrivate = true;
- Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ getMang()->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
} else if (MO.isSymbol()) {
Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
@@ -110,7 +112,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -124,7 +126,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -140,7 +142,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AsmPrinter.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -591,18 +593,6 @@ ReSimplify:
case X86::MOVSX64rr32:
SimplifyMOVSX(OutMI);
break;
-
- case X86::MORESTACK_RET:
- OutMI.setOpcode(X86::RET);
- break;
-
- case X86::MORESTACK_RET_RESTORE_R10:
- OutMI.setOpcode(X86::MOV64rr);
- OutMI.addOperand(MCOperand::CreateReg(X86::R10));
- OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
-
- AsmPrinter.OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
- break;
}
}
@@ -685,8 +675,140 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
.addExpr(tlsRef));
}
+static std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+parseMemoryOperand(StackMaps::Location::LocationType LocTy, unsigned Size,
+ MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE) {
+
+ typedef StackMaps::Location Location;
+
+ assert(std::distance(MOI, MOE) >= 5 && "Too few operands to encode mem op.");
+
+ const MachineOperand &Base = *MOI;
+ const MachineOperand &Scale = *(++MOI);
+ const MachineOperand &Index = *(++MOI);
+ const MachineOperand &Disp = *(++MOI);
+ const MachineOperand &ZeroReg = *(++MOI);
+
+ // Sanity check for supported operand format.
+ assert(Base.isReg() &&
+ Scale.isImm() && Scale.getImm() == 1 &&
+ Index.isReg() && Index.getReg() == 0 &&
+ Disp.isImm() && ZeroReg.isReg() && (ZeroReg.getReg() == 0) &&
+ "Unsupported x86 memory operand sequence.");
+ (void)Scale;
+ (void)Index;
+ (void)ZeroReg;
+
+ return std::make_pair(
+ Location(LocTy, Size, Base.getReg(), Disp.getImm()), ++MOI);
+}
+
+std::pair<StackMaps::Location, MachineInstr::const_mop_iterator>
+X86AsmPrinter::stackmapOperandParser(MachineInstr::const_mop_iterator MOI,
+ MachineInstr::const_mop_iterator MOE,
+ const TargetMachine &TM) {
+
+ typedef StackMaps::Location Location;
+
+ const MachineOperand &MOP = *MOI;
+ assert(!MOP.isRegMask() && (!MOP.isReg() || !MOP.isImplicit()) &&
+ "Register mask and implicit operands should not be processed.");
+
+ if (MOP.isImm()) {
+ // Verify anyregcc
+ // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
+
+ switch (MOP.getImm()) {
+ default: llvm_unreachable("Unrecognized operand type.");
+ case StackMaps::DirectMemRefOp: {
+ unsigned Size = TM.getDataLayout()->getPointerSizeInBits();
+ assert((Size % 8) == 0 && "Need pointer size in bytes.");
+ Size /= 8;
+ return parseMemoryOperand(StackMaps::Location::Direct, Size,
+ llvm::next(MOI), MOE);
+ }
+ case StackMaps::IndirectMemRefOp: {
+ ++MOI;
+ int64_t Size = MOI->getImm();
+ assert(Size > 0 && "Need a valid size for indirect memory locations.");
+ return parseMemoryOperand(StackMaps::Location::Indirect, Size,
+ llvm::next(MOI), MOE);
+ }
+ case StackMaps::ConstantOp: {
+ ++MOI;
+ assert(MOI->isImm() && "Expected constant operand.");
+ int64_t Imm = MOI->getImm();
+ return std::make_pair(
+ Location(Location::Constant, sizeof(int64_t), 0, Imm), ++MOI);
+ }
+ }
+ }
+
+ // Otherwise this is a reg operand. The physical register number will
+ // ultimately be encoded as a DWARF regno. The stack map also records the size
+ // of a spill slot that can hold the register content. (The runtime can
+ // track the actual size of the data type if it needs to.)
+ assert(MOP.isReg() && "Expected register operand here.");
+ assert(TargetRegisterInfo::isPhysicalRegister(MOP.getReg()) &&
+ "Virtreg operands should have been rewritten before now.");
+ const TargetRegisterClass *RC =
+ TM.getRegisterInfo()->getMinimalPhysRegClass(MOP.getReg());
+ assert(!MOP.getSubReg() && "Physical subreg still around.");
+ return std::make_pair(
+ Location(Location::Register, RC->getSize(), MOP.getReg(), 0), ++MOI);
+}
+
+// Lower a stackmap of the form:
+// <id>, <shadowBytes>, ...
+static void LowerSTACKMAP(MCStreamer &OutStreamer,
+ StackMaps &SM,
+ const MachineInstr &MI)
+{
+ unsigned NumNOPBytes = MI.getOperand(1).getImm();
+ SM.recordStackMap(MI);
+ // Emit padding.
+ // FIXME: These nops ensure that the stackmap's shadow is covered by
+ // instructions from the same basic block, but the nops should not be
+ // necessary if instructions from the same block follow the stackmap.
+ for (unsigned i = 0; i < NumNOPBytes; ++i)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
+static void LowerPATCHPOINT(MCStreamer &OutStreamer,
+ StackMaps &SM,
+ const MachineInstr &MI) {
+ SM.recordPatchPoint(MI);
+
+ PatchPointOpers opers(&MI);
+ unsigned ScratchIdx = opers.getNextScratchIdx();
+ unsigned EncodedBytes = 0;
+ int64_t CallTarget = opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+ if (CallTarget) {
+ // Emit MOV to materialize the target address and the CALL to target.
+ // This is encoded with 12-13 bytes, depending on which register is used.
+ // We conservatively assume that it is 12 bytes and emit in worst case one
+ // extra NOP byte.
+ EncodedBytes = 12;
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64ri)
+ .addReg(MI.getOperand(ScratchIdx).getReg())
+ .addImm(CallTarget));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::CALL64r)
+ .addReg(MI.getOperand(ScratchIdx).getReg()));
+ }
+ // Emit padding.
+ unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+
+ for (unsigned i = EncodedBytes; i < NumBytes; ++i)
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::NOOP));
+}
+
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(Mang, *MF, *this);
+ X86MCInstLower MCInstLowering(*MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -774,6 +896,24 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(DotExpr));
return;
}
+
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(OutStreamer, SM, *MI);
+
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(OutStreamer, SM, *MI);
+
+ case X86::MORESTACK_RET:
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
+ return;
+
+ case X86::MORESTACK_RET_RESTORE_R10:
+ // Return, then restore R10.
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::RET));
+ OutStreamer.EmitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::R10)
+ .addReg(X86::RAX));
+ return;
}
MCInst TmpInst;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0923310..dbda556 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -101,8 +101,8 @@ int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // Only enable when post-RA scheduling is enabled and this is needed.
- return TM.getSubtargetImpl()->postRAScheduler();
+ // ExeDepsFixer and PostRAScheduler require liveness.
+ return true;
}
int
@@ -239,6 +239,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
case CallingConv::HiPE:
return CSR_NoRegs_SaveList;
+ case CallingConv::WebKit_JS:
+ return CSR_64_SaveList;
+ case CallingConv::AnyReg:
+ return CSR_MostRegs_64_SaveList;
+
case CallingConv::Intel_OCL_BI: {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
@@ -296,6 +301,8 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
+ if (CC == CallingConv::WebKit_JS || CC == CallingConv::AnyReg)
+ return CSR_MostRegs_64_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;
if (CC == CallingConv::Cold)
@@ -512,14 +519,6 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
-unsigned X86RegisterInfo::getEHExceptionRegister() const {
- llvm_unreachable("What is the exception register");
-}
-
-unsigned X86RegisterInfo::getEHHandlerRegister() const {
- llvm_unreachable("What is the exception handler register");
-}
-
namespace llvm {
unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
bool High) {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index fb17682..22251b2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -126,10 +126,6 @@ public:
unsigned getBaseRegister() const { return BasePtr; }
// FIXME: Move to FrameInfok
unsigned getSlotSize() const { return SlotSize; }
-
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
};
// getX86SubSuperRegister - X86 utility function. It returns the sub or super
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 62ba2bc..9748261 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -19,6 +19,10 @@ def HaswellModel : SchedMachineModel {
let MicroOpBufferSize = 192; // Based on the reorder buffer.
let LoadLatency = 4;
let MispredictPenalty = 16;
+
+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
}
let SchedModel = HaswellModel in {
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 52ead94..3011c6d 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -20,6 +20,10 @@ def SandyBridgeModel : SchedMachineModel {
let MicroOpBufferSize = 168; // Based on the reorder buffer.
let LoadLatency = 4;
let MispredictPenalty = 16;
+
+ // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
}
let SchedModel = SandyBridgeModel in {
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index ceb2e05..0556437 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -141,9 +141,12 @@ def IIC_IDIV64 : InstrItinClass;
// neg/not/inc/dec
def IIC_UNARY_REG : InstrItinClass;
def IIC_UNARY_MEM : InstrItinClass;
-// add/sub/and/or/xor/adc/sbc/cmp/test
+// add/sub/and/or/xor/sbc/cmp/test
def IIC_BIN_MEM : InstrItinClass;
def IIC_BIN_NONMEM : InstrItinClass;
+// adc/sbc
+def IIC_BIN_CARRY_MEM : InstrItinClass;
+def IIC_BIN_CARRY_NONMEM : InstrItinClass;
// shift/rotate
def IIC_SR : InstrItinClass;
// shift double
@@ -250,11 +253,11 @@ def IIC_SSE_INTSH_P_RR : InstrItinClass;
def IIC_SSE_INTSH_P_RM : InstrItinClass;
def IIC_SSE_INTSH_P_RI : InstrItinClass;
-def IIC_SSE_CMPP_RR : InstrItinClass;
-def IIC_SSE_CMPP_RM : InstrItinClass;
+def IIC_SSE_INTSHDQ_P_RI : InstrItinClass;
def IIC_SSE_SHUFP : InstrItinClass;
-def IIC_SSE_PSHUF : InstrItinClass;
+def IIC_SSE_PSHUF_RI : InstrItinClass;
+def IIC_SSE_PSHUF_MI : InstrItinClass;
def IIC_SSE_UNPCK : InstrItinClass;
@@ -316,7 +319,8 @@ def IIC_SSE_PSIGN_RM : InstrItinClass;
def IIC_SSE_PMADD : InstrItinClass;
def IIC_SSE_PMULHRSW : InstrItinClass;
-def IIC_SSE_PALIGNR : InstrItinClass;
+def IIC_SSE_PALIGNRR : InstrItinClass;
+def IIC_SSE_PALIGNRM : InstrItinClass;
def IIC_SSE_MWAIT : InstrItinClass;
def IIC_SSE_MONITOR : InstrItinClass;
@@ -492,8 +496,8 @@ def IIC_PUSH_REG : InstrItinClass;
def IIC_PUSH_F : InstrItinClass;
def IIC_PUSH_A : InstrItinClass;
def IIC_BSWAP : InstrItinClass;
-def IIC_BSF : InstrItinClass;
-def IIC_BSR : InstrItinClass;
+def IIC_BIT_SCAN_MEM : InstrItinClass;
+def IIC_BIT_SCAN_REG : InstrItinClass;
def IIC_MOVS : InstrItinClass;
def IIC_STOS : InstrItinClass;
def IIC_SCAS : InstrItinClass;
@@ -540,6 +544,33 @@ def IIC_BOUND : InstrItinClass;
def IIC_ARPL_REG : InstrItinClass;
def IIC_ARPL_MEM : InstrItinClass;
def IIC_MOVBE : InstrItinClass;
+def IIC_AES : InstrItinClass;
+def IIC_BLEND_MEM : InstrItinClass;
+def IIC_BLEND_NOMEM : InstrItinClass;
+def IIC_CBW : InstrItinClass;
+def IIC_CRC32_REG : InstrItinClass;
+def IIC_CRC32_MEM : InstrItinClass;
+def IIC_SSE_DPPD_RR : InstrItinClass;
+def IIC_SSE_DPPD_RM : InstrItinClass;
+def IIC_SSE_DPPS_RR : InstrItinClass;
+def IIC_SSE_DPPS_RM : InstrItinClass;
+def IIC_MMX_EMMS : InstrItinClass;
+def IIC_SSE_EXTRACTPS_RR : InstrItinClass;
+def IIC_SSE_EXTRACTPS_RM : InstrItinClass;
+def IIC_SSE_INSERTPS_RR : InstrItinClass;
+def IIC_SSE_INSERTPS_RM : InstrItinClass;
+def IIC_SSE_MPSADBW_RR : InstrItinClass;
+def IIC_SSE_MPSADBW_RM : InstrItinClass;
+def IIC_SSE_PMULLD_RR : InstrItinClass;
+def IIC_SSE_PMULLD_RM : InstrItinClass;
+def IIC_SSE_ROUNDPS_REG : InstrItinClass;
+def IIC_SSE_ROUNDPS_MEM : InstrItinClass;
+def IIC_SSE_ROUNDPD_REG : InstrItinClass;
+def IIC_SSE_ROUNDPD_MEM : InstrItinClass;
+def IIC_SSE_POPCNT_RR : InstrItinClass;
+def IIC_SSE_POPCNT_RM : InstrItinClass;
+def IIC_SSE_PCLMULQDQ_RR : InstrItinClass;
+def IIC_SSE_PCLMULQDQ_RM : InstrItinClass;
def IIC_NOP : InstrItinClass;
@@ -561,7 +592,7 @@ def IIC_NOP : InstrItinClass;
// latencies. Since these latencies are not used for pipeline hazards,
// they do not need to be exact.
//
-// The GenericModel contains no instruciton itineraries.
+// The GenericModel contains no instruction itineraries.
def GenericModel : SchedMachineModel {
let IssueWidth = 4;
let MicroOpBufferSize = 32;
@@ -572,3 +603,4 @@ def GenericModel : SchedMachineModel {
include "X86ScheduleAtom.td"
include "X86SchedSandyBridge.td"
include "X86SchedHaswell.td"
+include "X86ScheduleSLM.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 14a1471..ba72f29 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the itinerary class data for the Intel Atom (Bonnell)
-// processors.
+// This file defines the itinerary class data for the Intel Atom
+// in order (Saltwell-32nm/Bonnell-45nm) processors.
//
//===----------------------------------------------------------------------===//
@@ -79,9 +79,12 @@ def AtomItineraries : ProcessorItineraries<
// neg/not/inc/dec
InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
- // add/sub/and/or/xor/adc/sbc/cmp/test
+ // add/sub/and/or/xor/cmp/test
InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
+ // adc/sbc
+ InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<1, [Port0]>] >,
// shift/rotate
InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
// shift double
@@ -203,11 +206,11 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
- InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
+ InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
@@ -278,7 +281,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
- InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [Port0]>] >,
InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
@@ -470,8 +474,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_PUSH_A, [InstrStage<8, [Port0, Port1]>] >,
InstrItinData<IIC_BSWAP, [InstrStage<1, [Port0]>] >,
- InstrItinData<IIC_BSF, [InstrStage<16, [Port0, Port1]>] >,
- InstrItinData<IIC_BSR, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<16, [Port0, Port1]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<16, [Port0, Port1]>] >,
InstrItinData<IIC_MOVS, [InstrStage<3, [Port0, Port1]>] >,
InstrItinData<IIC_STOS, [InstrStage<1, [Port0, Port1]>] >,
InstrItinData<IIC_SCAS, [InstrStage<2, [Port0, Port1]>] >,
@@ -518,6 +522,8 @@ def AtomItineraries : ProcessorItineraries<
InstrItinData<IIC_ARPL_REG, [InstrStage<24, [Port0, Port1]>] >,
InstrItinData<IIC_ARPL_MEM, [InstrStage<23, [Port0, Port1]>] >,
InstrItinData<IIC_MOVBE, [InstrStage<1, [Port0]>] >,
+ InstrItinData<IIC_CBW, [InstrStage<4, [Port0, Port1]>] >,
+ InstrItinData<IIC_MMX_EMMS, [InstrStage<5, [Port0, Port1]>] >,
InstrItinData<IIC_NOP, [InstrStage<1, [Port0, Port1]>] >
]>;
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
new file mode 100644
index 0000000..6c2a304
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -0,0 +1,668 @@
+//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Intel Atom
+// (Silvermont) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IEC_RSV0 : FuncUnit;
+def IEC_RSV1 : FuncUnit;
+def FPC_RSV0 : FuncUnit;
+def FPC_RSV1 : FuncUnit;
+def MEC_RSV : FuncUnit;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+def SLMItineraries : ProcessorItineraries<
+ [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ],
+ [], [
+ // [InstrStage<N, [FPC_RSV0, FPC_RSV1]>]
+ // [InstrStage<N, [FPC_RSV0, FPC_RSV1], 0>, InstrStage<N, [MEC_RSV]>]
+ // [InstrStage<N, [IEC_RSV0, IEC_RSV1]>]
+ // [InstrStage<N, [IEC_RSV0, IEC_RSV1], 0>,InstrStage<N,[MEC_RSV]>]
+ //
+ // Default is 1 cycle, IEC_RSV0 or IEC_RSV1
+ //InstrItinData<IIC_DEFAULT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ALU_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEA_16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // mul
+ InstrItinData<IIC_MUL8, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL16_MEM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_MUL16_REG, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_MUL32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MUL64, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul by al, ax, eax, rax
+ InstrItinData<IIC_IMUL8, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL16_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL16_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul reg by reg|mem
+ InstrItinData<IIC_IMUL16_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL16_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_RM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_RR, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL64_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ // imul reg = reg/mem * imm
+ InstrItinData<IIC_IMUL16_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL32_RRI, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL64_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IMUL16_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL32_RMI, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_IMUL64_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ // idiv - min latency
+ InstrItinData<IIC_IDIV8, [InstrStage<34, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV16, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV32, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IDIV64, [InstrStage<49, [IEC_RSV0, IEC_RSV1]>] >,
+ // div - min latency
+ InstrItinData<IIC_DIV8_REG, [InstrStage<25, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV8_MEM, [InstrStage<25, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<25, [MEC_RSV]>] >,
+ InstrItinData<IIC_DIV16, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV32, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DIV64, [InstrStage<38, [IEC_RSV0, IEC_RSV1]>] >,
+ // neg/not/inc/dec
+ InstrItinData<IIC_UNARY_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // add/sub/and/or/xor/adc/sbc/cmp/test
+ InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIN_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // adc/sbb
+ InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ // shift/rotate
+ InstrItinData<IIC_SR, [InstrStage<1, [IEC_RSV0], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // shift double
+ InstrItinData<IIC_SHD16_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD16_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD32_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD64_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD64_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
+ InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ // cmov
+ InstrItinData<IIC_CMOV16_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV16_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMOV32_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV32_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMOV64_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMOV64_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
+ // set
+ InstrItinData<IIC_SET_M, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SET_R, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jcc
+ InstrItinData<IIC_Jcc, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jcxz/jecxz/jrcxz
+ InstrItinData<IIC_JCXZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jmp rel
+ InstrItinData<IIC_JMP_REL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // jmp indirect
+ InstrItinData<IIC_JMP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_JMP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ // jmp far
+ InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // loop/loope/loopne
+ InstrItinData<IIC_LOOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LOOPE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LOOPNE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // call - all but reg/imm
+ InstrItinData<IIC_CALL_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CALL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //ret
+ InstrItinData<IIC_RET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RET_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //sign extension movs
+ InstrItinData<IIC_MOVSX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ //zero extension movs
+ InstrItinData<IIC_MOVZX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_REP_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_REP_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ // SSE binary operations
+ // arithmetic fp scalar
+ InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+
+ // arithmetic fp parallel
+ InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<27, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<27, [MEC_RSV]>] >,
+
+ // bitwise parallel
+ InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ // arithmetic int parallel
+ InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ // multiply int parallel
+ InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [FPC_RSV0], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+
+ // shift parallel
+ InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<2, [FPC_RSV0], 0>,
+ InstrStage<2, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [FPC_RSV0], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [FPC_RSV0]>] >,
+
+ InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<26, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<26, [FPC_RSV0], 0>,
+ InstrStage<26, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<13, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<13, [FPC_RSV0], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<26, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<26, [FPC_RSV0], 0>,
+ InstrStage<26, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<13, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<13, [FPC_RSV0], 0>,
+ InstrStage<13, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<9, [FPC_RSV0], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [FPC_RSV0]>] >,
+ InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [FPC_RSV0], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVMSK, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MASKMOV, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_LDDQU, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PAUSE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_STMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<9, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MWAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MONITOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ // conversions
+ // to/from PD ...
+ InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ // to/from PS except to/from PD and PS2PI
+ InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+
+ // MMX MOVs
+ InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // other MMX
+ InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PMUL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PSADBW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PEXTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // conversions
+ // from/to PD
+ InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // from/to PI
+ InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_CMPX_LOCK, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FILD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLD80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FST80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FIST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_FLDZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FUCOM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FUCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNSTSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNSTCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLDCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNINIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FFREE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNCLEX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_WAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FNOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FLDL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_F2XM1, [InstrStage<88, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FYL2X, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FPTAN, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FPATAN, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FXTRACT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPREM1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPSTP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FPREM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FYL2XP1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FSINCOS, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FRNDINT, [InstrStage<25, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FSCALE, [InstrStage<74, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_FCOMPP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXSAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXRSTOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_FXCH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+
+ // System instructions
+ InstrItinData<IIC_CPUID, [InstrStage<60, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INT3, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INVD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INVLPG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IRET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_HLT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LXS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDTSC, [InstrStage<30, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RSM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SLDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SWAPGS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SYSCALL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_IN_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_IN_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUT_IR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_INS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_MOV_REG_DR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_DR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // worst case for mov REG_CRx
+ InstrItinData<IIC_MOV_REG_CR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_CR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_MEM_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_SR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_SR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // LAR
+ InstrItinData<IIC_LAR_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LAR_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // LSL
+ InstrItinData<IIC_LSL_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LSL_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_LGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LLDT_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LLDT_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // push control register, segment registers
+ InstrItinData<IIC_PUSH_CS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // pop control register, segment registers
+ InstrItinData<IIC_POP_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_SR_SS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // VERR, VERW
+ InstrItinData<IIC_VERR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_VERW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_VERW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // WRMSR, RDMSR
+ InstrItinData<IIC_WRMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_RDPMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ // SMSW, LMSW
+ InstrItinData<IIC_SMSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LMSW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LMSW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_ENTER, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LEAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_POP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_REG16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_FD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_POP_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_PUSH_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+
+ InstrItinData<IIC_BSWAP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_SCAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_MOV_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_AHF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BT_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BT_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BTX_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BTX_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_BTX_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BTX_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XCHG_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XCHG_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_XADD_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XADD_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<6, [MEC_RSV]>] >,
+ InstrItinData<IIC_CMPXCHG_8B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMPXCHG_16B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_LODS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_OUTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CLTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_STD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_XLAT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_DAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_BOUND, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ARPL_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_ARPL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_MOVBE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_AES, [InstrStage<8, [FPC_RSV0]>] >,
+ InstrItinData<IIC_BLEND_NOMEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_BLEND_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+ InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CBW, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CRC32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
+ InstrItinData<IIC_CRC32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
+ InstrStage<3, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DPPD_RR, [InstrStage<12, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DPPD_RM, [InstrStage<12, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<12, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_DPPS_RR, [InstrStage<15, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_DPPS_RM, [InstrStage<15, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<15, [MEC_RSV]>] >,
+ InstrItinData<IIC_MMX_EMMS, [InstrStage<10, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_EXTRACTPS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_EXTRACTPS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_INSERTPS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_INSERTPS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_MPSADBW_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_MPSADBW_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<1, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PMULLD_RR, [InstrStage<11, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PMULLD_RM, [InstrStage<11, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<11, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ROUNDPS_REG, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ROUNDPS_MEM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<5, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_ROUNDPD_REG, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
+ InstrItinData<IIC_SSE_ROUNDPD_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_POPCNT_RR, [InstrStage<4, [IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_POPCNT_RM, [InstrStage<4, [IEC_RSV1], 0>,
+ InstrStage<4, [MEC_RSV]>] >,
+ InstrItinData<IIC_SSE_PCLMULQDQ_RR, [InstrStage<10, [IEC_RSV1]>] >,
+ InstrItinData<IIC_SSE_PCLMULQDQ_RM, [InstrStage<10, [IEC_RSV1], 0>,
+ InstrStage<10, [MEC_RSV]>] >,
+
+ InstrItinData<IIC_NOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >
+ ]>;
+
+// Silvermont machine model.
+def SLMModel : SchedMachineModel {
+ let IssueWidth = 2; // Allows 2 instructions per scheduling group.
+ let MinLatency = 1; // InstrStage cycles overrides MinLatency.
+ // OperandCycles may be used for expected latency.
+ let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
+ let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+
+ let Itineraries = SLMItineraries;
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index d1db79f..b9c620f 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -46,8 +46,6 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
!ConstantSize ||
ConstantSize->getZExtValue() >
Subtarget->getMaxInlineSizeThreshold()) {
- SDValue InFlag(0, 0);
-
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index fae90f2..01353b2 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -276,20 +276,29 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
(Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
(Family == 6 && Model == 0x2A) || // SandyBridge
(Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
- (Family == 6 && Model == 0x3A))) {// IvyBridge
+ (Family == 6 && Model == 0x3A) || // IvyBridge
+ (Family == 6 && Model == 0x3E) || // IvyBridge EP
+ (Family == 6 && Model == 0x3C) || // Haswell
+ (Family == 6 && Model == 0x3F) || // ...
+ (Family == 6 && Model == 0x45) || // ...
+ (Family == 6 && Model == 0x46))) { // ...
IsUAMemFast = true;
ToggleFeature(X86::FeatureFastUAMem);
}
- // Set processor type. Currently only Atom is detected.
+ // Set processor type. Currently only Atom or Silvermont (SLM) is detected.
if (Family == 6 &&
- (Model == 28 || Model == 38 || Model == 39
- || Model == 53 || Model == 54)) {
+ (Model == 28 || Model == 38 || Model == 39 ||
+ Model == 53 || Model == 54)) {
X86ProcFamily = IntelAtom;
UseLeaForSP = true;
ToggleFeature(X86::FeatureLeaForSP);
}
+ else if (Family == 6 &&
+ (Model == 55 || Model == 74 || Model == 77)) {
+ X86ProcFamily = IntelSLM;
+ }
unsigned MaxExtLevel;
X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -351,14 +360,38 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasRTM = true;
ToggleFeature(X86::FeatureRTM);
}
- if (IsIntel && ((EBX >> 19) & 0x1)) {
- HasADX = true;
- ToggleFeature(X86::FeatureADX);
+ if (IsIntel && ((EBX >> 16) & 0x1)) {
+ X86SSELevel = AVX512F;
+ ToggleFeature(X86::FeatureAVX512);
}
if (IsIntel && ((EBX >> 18) & 0x1)) {
HasRDSEED = true;
ToggleFeature(X86::FeatureRDSEED);
}
+ if (IsIntel && ((EBX >> 19) & 0x1)) {
+ HasADX = true;
+ ToggleFeature(X86::FeatureADX);
+ }
+ if (IsIntel && ((EBX >> 26) & 0x1)) {
+ HasPFI = true;
+ ToggleFeature(X86::FeaturePFI);
+ }
+ if (IsIntel && ((EBX >> 27) & 0x1)) {
+ HasERI = true;
+ ToggleFeature(X86::FeatureERI);
+ }
+ if (IsIntel && ((EBX >> 28) & 0x1)) {
+ HasCDI = true;
+ ToggleFeature(X86::FeatureCDI);
+ }
+ if (IsIntel && ((EBX >> 29) & 0x1)) {
+ HasSHA = true;
+ ToggleFeature(X86::FeatureSHA);
+ }
+ }
+ if (IsAMD && ((ECX >> 21) & 0x1)) {
+ HasTBM = true;
+ ToggleFeature(X86::FeatureTBM);
}
}
}
@@ -416,8 +449,8 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Make sure 64-bit features are available in 64-bit mode.
if (In64BitMode) {
- HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
- HasCMov = true; ToggleFeature(X86::FeatureCMOV);
+ if (!HasX86_64) { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); }
+ if (!HasCMov) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
if (X86SSELevel < SSE2) {
X86SSELevel = SSE2;
@@ -429,9 +462,9 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// CPUName may have been set by the CPU detection code. Make sure the
// new MCSchedModel is used.
- InitMCProcessorInfo(CPUName, FS);
+ InitCPUSchedModel(CPUName);
- if (X86ProcFamily == IntelAtom)
+ if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
PostRAScheduler = true;
InstrItins = getInstrItineraryForCPU(CPUName);
@@ -468,6 +501,7 @@ void X86Subtarget::initializeEnvironment() {
HasFMA = false;
HasFMA4 = false;
HasXOP = false;
+ HasTBM = false;
HasMOVBE = false;
HasRDRAND = false;
HasF16C = false;
@@ -479,8 +513,9 @@ void X86Subtarget::initializeEnvironment() {
HasHLE = false;
HasERI = false;
HasCDI = false;
- HasPFI=false;
+ HasPFI = false;
HasADX = false;
+ HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
IsBTMemSlow = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8793238..dd8c081 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -42,7 +42,7 @@ enum Style {
class X86Subtarget : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
+ NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
@@ -50,7 +50,7 @@ protected:
};
enum X86ProcFamilyEnum {
- Others, IntelAtom
+ Others, IntelAtom, IntelSLM
};
/// X86ProcFamily - X86 processor family: Intel Atom, and others
@@ -97,6 +97,9 @@ protected:
/// HasXOP - Target has XOP instructions
bool HasXOP;
+ /// HasTBM - Target has TBM instructions.
+ bool HasTBM;
+
/// HasMOVBE - True if the processor has the MOVBE instruction.
bool HasMOVBE;
@@ -127,6 +130,9 @@ protected:
/// HasADX - Processor has ADX instructions.
bool HasADX;
+ /// HasSHA - Processor has SHA instructions.
+ bool HasSHA;
+
/// HasPRFCHW - Processor has PRFCHW instructions.
bool HasPRFCHW;
@@ -258,7 +264,7 @@ public:
bool hasSSE42() const { return X86SSELevel >= SSE42; }
bool hasAVX() const { return X86SSELevel >= AVX; }
bool hasAVX2() const { return X86SSELevel >= AVX2; }
- bool hasAVX512() const { return X86SSELevel >= AVX512; }
+ bool hasAVX512() const { return X86SSELevel >= AVX512F; }
bool hasFp256() const { return hasAVX(); }
bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
@@ -271,6 +277,7 @@ public:
// FIXME: Favor FMA when both are enabled. Is this the right thing to do?
bool hasFMA4() const { return HasFMA4 && !HasFMA; }
bool hasXOP() const { return HasXOP; }
+ bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
bool hasRDRAND() const { return HasRDRAND; }
bool hasF16C() const { return HasF16C; }
@@ -281,6 +288,7 @@ public:
bool hasRTM() const { return HasRTM; }
bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
+ bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
@@ -311,10 +319,8 @@ public:
return (TargetTriple.getEnvironment() == Triple::ELF ||
TargetTriple.isOSBinFormatELF());
}
- bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
- bool isTargetNaCl() const {
- return TargetTriple.getOS() == Triple::NaCl;
- }
+ bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
@@ -327,15 +333,14 @@ public:
}
bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
+ bool isOSWindows() const { return TargetTriple.isOSWindows(); }
+
bool isTargetWin64() const {
- // FIXME: x86_64-cygwin has not been released yet.
return In64BitMode && TargetTriple.isOSWindows();
}
bool isTargetWin32() const {
- // FIXME: Cygwin is included for isTargetWin64 -- should it be included
- // here too?
- return !In64BitMode && (isTargetMingw() || isTargetWindows());
+ return !In64BitMode && (isTargetCygMing() || isTargetWindows());
}
bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
@@ -380,11 +385,14 @@ public:
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
-
+
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// Enable the MachineScheduler pass for all X86 subtargets.
+ bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
+
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 49ebd1a..ddf580f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -92,7 +92,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
} else if (Subtarget.is64Bit()) {
// PIC in 64 bit mode is always rip-rel.
Subtarget.setPICStyle(PICStyles::RIPRel);
- } else if (Subtarget.isTargetCygMing()) {
+ } else if (Subtarget.isTargetCOFF()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
if (getRelocationModel() == Reloc::PIC_)
@@ -114,14 +114,14 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
// Command line options for x86
//===----------------------------------------------------------------------===//
static cl::opt<bool>
-UseVZeroUpper("x86-use-vzeroupper",
+UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
cl::desc("Minimize AVX to SSE transition penalty"),
cl::init(true));
// Temporary option to control early if-conversion for x86 while adding machine
// models.
static cl::opt<bool>
-X86EarlyIfConv("x86-early-ifcvt",
+X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
cl::desc("Enable early if-conversion on X86"));
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index a19c5a6..086cd4d 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -25,7 +25,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
// On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
// is an indirect pc-relative reference.
if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
- const MCSymbol *Sym = Mang->getSymbol(GV);
+ const MCSymbol *Sym = getSymbol(*Mang, GV);
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
const MCExpr *Four = MCConstantExpr::Create(4, getContext());
@@ -39,7 +39,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
MCSymbol *X86_64MachoTargetObjectFile::
getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI) const {
- return Mang->getSymbol(GV);
+ return getSymbol(*Mang, GV);
}
void
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 3bbddad..f88a666 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -101,6 +101,9 @@ public:
unsigned AddressSpace) const;
virtual unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex) const;
+
+ virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
+ bool IsPairwiseForm) const;
/// @}
};
@@ -127,8 +130,8 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
// TODO: Currently the __builtin_popcount() implementation using SSE3
// instructions is inefficient. Once the problem is fixed, we should
- // call ST->hasSSE3() instead of ST->hasSSE4().
- return ST->hasSSE41() ? PSK_FastHardware : PSK_Software;
+ // call ST->hasSSE3() instead of ST->hasPOPCNT().
+ return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
}
unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
@@ -174,7 +177,7 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT> AVX2CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::SHL, MVT::v4i32, 1 },
@@ -211,13 +214,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX2 lowering tricks.
if (ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX2CostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> SSE2UniformConstCostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType>
+ SSE2UniformConstCostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// Constant splats are cheaper for the following instructions.
@@ -238,15 +241,13 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasSSE2()) {
- int Idx = CostTableLookup<MVT>(SSE2UniformConstCostTable,
- array_lengthof(SSE2UniformConstCostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * SSE2UniformConstCostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> SSE2CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// For some cases, where the shift amount is a scalar we would be able
@@ -287,13 +288,12 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
};
if (ST->hasSSE2()) {
- int Idx = CostTableLookup<MVT>(SSE2CostTable, array_lengthof(SSE2CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * SSE2CostTable[Idx].Cost;
}
- static const CostTblEntry<MVT> AVX1CostTable[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
@@ -312,21 +312,19 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable),
- ISD, LT.second);
+ int Idx = CostTableLookup(AVX1CostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * AVX1CostTable[Idx].Cost;
}
// Custom lowering of vectors.
- static const CostTblEntry<MVT> CustomLowered[] = {
+ static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
// A v2i64/v4i64 and multiply is custom lowered as a series of long
// multiplies(3), shifts(4) and adds(2).
{ ISD::MUL, MVT::v2i64, 9 },
{ ISD::MUL, MVT::v4i64, 9 },
};
- int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered),
- ISD, LT.second);
+ int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
if (Idx != -1)
return LT.first * CustomLowered[Idx].Cost;
@@ -363,7 +361,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
- static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ SSE2ConvTbl[] = {
// These are somewhat magic numbers justified by looking at the output of
// Intel's IACA, running some kernels and making sure when we take
// legalization into account the throughput will be overestimated.
@@ -387,9 +386,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
};
if (ST->hasSSE2() && !ST->hasAVX()) {
- int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
- array_lengthof(SSE2ConvTbl),
- ISD, LTDest.second, LTSrc.second);
+ int Idx =
+ ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
if (Idx != -1)
return LTSrc.first * SSE2ConvTbl[Idx].Cost;
}
@@ -401,13 +399,17 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
if (!SrcTy.isSimple() || !DstTy.isSimple())
return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
- static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ AVXConversionTbl[] = {
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
@@ -446,9 +448,8 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
};
if (ST->hasAVX()) {
- int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl,
- array_lengthof(AVXConversionTbl),
- ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT());
+ int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
if (Idx != -1)
return AVXConversionTbl[Idx].Cost;
}
@@ -466,7 +467,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT> SSE42CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 1 },
{ ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
@@ -475,7 +476,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i8, 1 },
};
- static const CostTblEntry<MVT> AVX1CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
{ ISD::SETCC, MVT::v4f64, 1 },
{ ISD::SETCC, MVT::v8f32, 1 },
// AVX1 does not support 8-wide integer compare.
@@ -485,7 +486,7 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 4 },
};
- static const CostTblEntry<MVT> AVX2CostTbl[] = {
+ static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
{ ISD::SETCC, MVT::v4i64, 1 },
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
@@ -493,22 +494,19 @@ unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
};
if (ST->hasAVX2()) {
- int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * AVX2CostTbl[Idx].Cost;
}
if (ST->hasAVX()) {
- int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * AVX1CostTbl[Idx].Cost;
}
if (ST->hasSSE42()) {
- int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl),
- ISD, MTy);
+ int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
if (Idx != -1)
return LT.first * SSE42CostTbl[Idx].Cost;
}
@@ -613,3 +611,84 @@ unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return TargetTransformInfo::getAddressComputationCost(Ty, IsComplex);
}
+
+unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
+ bool IsPairwise) const {
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+
+ MVT MTy = LT.second;
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+ // and make it as the cost.
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 2 },
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
+ { ISD::ADD, MVT::v8i16, 5 },
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::FADD, MVT::v4f64, 5 },
+ { ISD::FADD, MVT::v8f32, 7 },
+ { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
+ { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
+ { ISD::ADD, MVT::v8i16, 5 },
+ { ISD::ADD, MVT::v8i32, 5 },
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
+ { ISD::FADD, MVT::v2f64, 2 },
+ { ISD::FADD, MVT::v4f32, 4 },
+ { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
+ { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
+ };
+
+ static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
+ { ISD::FADD, MVT::v4f32, 3 },
+ { ISD::FADD, MVT::v4f64, 3 },
+ { ISD::FADD, MVT::v8f32, 4 },
+ { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
+ { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
+ { ISD::ADD, MVT::v4i64, 3 },
+ { ISD::ADD, MVT::v8i16, 4 },
+ { ISD::ADD, MVT::v8i32, 5 },
+ };
+
+ if (IsPairwise) {
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTblPairWise[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTblPairWise[Idx].Cost;
+ }
+ } else {
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
+ }
+
+ if (ST->hasSSE42()) {
+ int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
+ if (Idx != -1)
+ return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
+ }
+ }
+
+ return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
+}
+
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 477f75a..66ae9c2 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -122,11 +122,11 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
}
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
- for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
- for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
@@ -148,6 +148,25 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}
+/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
+/// instruction.
+static bool clobbersAnyYmmReg(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isRegMask())
+ continue;
+ for (unsigned reg = X86::YMM0; reg <= X86::YMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM31; ++reg) {
+ if (MO.clobbersPhysReg(reg))
+ return true;
+ }
+ }
+ return false;
+}
+
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
@@ -231,8 +250,9 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
bool BBHasCall = false;
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
- MachineInstr *MI = I;
DebugLoc dl = I->getDebugLoc();
+ MachineInstr *MI = I;
+
bool isControlFlow = MI->isCall() || MI->isReturn();
// Shortcut: don't need to check regular instructions in dirty state.
@@ -251,6 +271,14 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
if (!isControlFlow)
continue;
+ // If the call won't clobber any YMM register, skip it as well. It usually
+ // happens on helper function calls (such as '_chkstk', '_ftol2') where
+ // standard calling convention is not used (RegMask is not used to mark
+ // register clobbered and register usage (def/imp-def/use) is well-dfined
+ // and explicitly specified.
+ if (MI->isCall() && !clobbersAnyYmmReg(MI))
+ continue;
+
BBHasCall = true;
// The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 85d2a1d..3fa3b34 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_target(XCoreCodeGen
XCoreSubtarget.cpp
XCoreTargetMachine.cpp
XCoreTargetObjectFile.cpp
+ XCoreTargetTransformInfo.cpp
XCoreSelectionDAGInfo.cpp
)
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 6f44551..3d1c474 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -23,10 +23,14 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
PrivateGlobalPrefix = ".L";
AscizDirective = ".asciiz";
- WeakDefDirective = "\t.weak\t";
- WeakRefDirective = "\t.weak\t";
+
+ HiddenVisibilityAttr = MCSA_Invalid;
+ HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+ ProtectedVisibilityAttr = MCSA_Invalid;
// Debug
HasLEB128 = true;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ DwarfRegNumForCFI = true;
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index b5a9660..e53c96b 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -14,13 +14,13 @@
#ifndef XCORETARGETASMINFO_H
#define XCORETARGETASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class StringRef;
class Target;
- class XCoreMCAsmInfo : public MCAsmInfo {
+ class XCoreMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit XCoreMCAsmInfo(StringRef TT);
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 2f375fc..73c310b 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -31,6 +31,8 @@ namespace llvm {
CodeGenOpt::Level OptLevel);
ModulePass *createXCoreLowerThreadLocalPass();
+ ImmutablePass *createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM);
+
} // end namespace llvm;
#endif
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 35ba299..c03dfe6 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -88,14 +89,12 @@ void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
MCSymbol *SymGlob = OutContext.GetOrCreateSymbol(
Twine(Sym->getName() + StringRef(".globound")));
OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Global);
-
- OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()) +
- ".globound," + Twine(ATy->getNumElements()));
-
+ OutStreamer.EmitAssignment(SymGlob,
+ MCConstantExpr::Create(ATy->getNumElements(),
+ OutContext));
if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
// TODO Use COMDAT groups for LinkOnceLinkage
- OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
- ".globound");
+ OutStreamer.EmitSymbolAttribute(SymGlob, MCSA_Weak);
}
}
}
@@ -110,7 +109,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
- MCSymbol *GVSym = Mang->getSymbol(GV);
+ MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
@@ -217,7 +216,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << *MO.getMBB()->getSymbol();
break;
case MachineOperand::MO_GlobalAddress:
- O << *Mang->getSymbol(MO.getGlobal());
+ O << *getSymbol(MO.getGlobal());
break;
case MachineOperand::MO_ExternalSymbol:
O << MO.getSymbolName();
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index b57cf9d..c34b35c 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -30,10 +30,6 @@
using namespace llvm;
// helper functions. FIXME: Eliminate.
-static inline bool isImmUs(unsigned val) {
- return val <= 11;
-}
-
static inline bool isImmU6(unsigned val) {
return val < (1 << 6);
}
@@ -92,11 +88,16 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo *MMI = &MF.getMMI();
+ const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ if (MFI->getMaxAlignment() > getStackAlignment())
+ report_fatal_error("emitPrologue unsupported alignment: "
+ + Twine(MFI->getMaxAlignment()));
+
bool FP = hasFP(MF);
const AttributeSet &PAL = MF.getFunction()->getAttributes();
@@ -119,21 +120,28 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
bool saveLR = XFI->getUsesLR();
// Do we need to allocate space on the stack?
if (FrameSize) {
+ bool LRSavedOnEntry = false;
int Opcode;
if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
MBB.addLiveIn(XCore::LR);
saveLR = false;
+ LRSavedOnEntry = true;
} else {
Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
}
BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
if (emitFrameMoves) {
-
// Show update of SP.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(FrameLabel,
+ -FrameSize*4));
+ if (LRSavedOnEntry) {
+ unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(FrameLabel, Reg, 0));
+ }
}
}
if (saveLR) {
@@ -144,6 +152,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+ unsigned Reg = MRI->getDwarfRegNum(XCore::LR, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SaveLRLabel, Reg,
+ LRSpillOffset));
}
}
@@ -156,15 +167,34 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+ unsigned Reg = MRI->getDwarfRegNum(XCore::R10, true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SaveR10Label, Reg,
+ FPSpillOffset));
}
// Set the FP from the SP.
unsigned FramePtr = XCore::R10;
- BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
- .addImm(0);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr).addImm(0);
if (emitFrameMoves) {
// Show FP is now valid.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+ unsigned Reg = MRI->getDwarfRegNum(FramePtr, true);
+ MMI->addFrameInst(MCCFIInstruction::createDefCfaRegister(FrameLabel,
+ Reg));
+ }
+ }
+
+ if (emitFrameMoves) {
+ // Frame moves for callee saved.
+ std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+ XFI->getSpillLabels();
+ for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+ MCSymbol *SpillLabel = SpillLabels[I].first;
+ CalleeSavedInfo &CSI = SpillLabels[I].second;
+ int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+ unsigned Reg = MRI->getDwarfRegNum(CSI.getReg(), true);
+ MMI->addFrameInst(MCCFIInstruction::createOffset(SpillLabel, Reg,
+ Offset));
}
}
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6fc7eef5..89ad27d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -59,6 +59,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::CRC8 : return "XCoreISD::CRC8";
case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
+ case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER";
default : return NULL;
}
}
@@ -79,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setStackPointerRegisterToSaveRestore(XCore::SP);
- setSchedulingPreference(Sched::RegPressure);
+ setSchedulingPreference(Sched::Source);
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
@@ -148,6 +149,13 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ // Exception handling
+ setExceptionPointerRegister(XCore::R0);
+ setExceptionSelectorRegister(XCore::R1);
+
+ // Atomic operations
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
// TRAMPOLINE is custom lowered.
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
@@ -166,6 +174,24 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setMinFunctionAlignment(1);
}
+bool XCoreTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ EVT VT1 = Val.getValueType();
+ if (!VT1.isSimple() || !VT1.isInteger() ||
+ !VT2.isSimple() || !VT2.isInteger())
+ return false;
+
+ switch (VT1.getSimpleVT().SimpleTy) {
+ default: break;
+ case MVT::i8:
+ return true;
+ }
+
+ return false;
+}
+
SDValue XCoreTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode())
@@ -188,6 +214,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
}
@@ -259,11 +286,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
return GA;
}
-static inline SDValue BuildGetId(SelectionDAG &DAG, SDLoc dl) {
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
- DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
-}
-
SDValue XCoreTargetLowering::
LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
{
@@ -834,6 +856,12 @@ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue XCoreTargetLowering::
+LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(XCoreISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
+}
+
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1200,7 +1228,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
ArgDI != ArgDE; ++ArgDI) {
if (ArgDI->Flags.isByVal() && ArgDI->Flags.getByValSize()) {
unsigned Size = ArgDI->Flags.getByValSize();
- unsigned Align = ArgDI->Flags.getByValAlign();
+ unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign());
// Create a new object on the stack and copy the pointee into it.
int FI = MFI->CreateStackObject(Size, Align, false, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7761b7c..bc08497 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -70,7 +70,10 @@ namespace llvm {
BR_JT,
// Jumptable branch using long branches for each entry.
- BR_JT32
+ BR_JT32,
+
+ // Memory barrier.
+ MEMBARRIER
};
}
@@ -83,6 +86,10 @@ namespace llvm {
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+ using TargetLowering::isZExtFree;
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
+
virtual unsigned getJumpTableEncoding() const;
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
@@ -154,6 +161,7 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index d6b8c2d..33c7f31 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "XCoreGenInstrInfo.inc"
namespace llvm {
@@ -39,6 +39,10 @@ namespace XCore {
using namespace llvm;
+
+// Pin the vtable to this file.
+void XCoreInstrInfo::anchor() {}
+
XCoreInstrInfo::XCoreInstrInfo()
: XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
RI() {
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 51d66a1..4429b07 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -24,6 +24,7 @@ namespace llvm {
class XCoreInstrInfo : public XCoreGenInstrInfo {
const XCoreRegisterInfo RI;
+ virtual void anchor();
public:
XCoreInstrInfo();
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 81fa84d..934a707 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -70,6 +70,11 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_XCoreCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SDT_XCoreMEMBARRIER : SDTypeProfile<0, 0, []>;
+
+def XCoreMemBarrier : SDNode<"XCoreISD::MEMBARRIER", SDT_XCoreMEMBARRIER,
+ [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -343,6 +348,10 @@ let usesCustomInserter = 1 in {
(select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
}
+let hasSideEffects = 1 in
+def Int_MemBarrier : PseudoInstXCore<(outs), (ins), "#MEMBARRIER",
+ [(XCoreMemBarrier)]>;
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 2e328b4..afce753 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -22,6 +22,9 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "xcore-lower-thread-local"
@@ -71,13 +74,104 @@ createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
return ConstantArray::get(NewType, Elements);
}
-static bool hasNonInstructionUse(GlobalVariable *GV) {
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
- ++UI)
- if (!isa<Instruction>(*UI))
- return true;
+static Instruction *
+createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
+ IRBuilder<true,NoFolder> Builder(Instr);
+ unsigned OpCode = CE->getOpcode();
+ switch (OpCode) {
+ case Instruction::GetElementPtr: {
+ SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
+ ArrayRef<Value *> CEOps(CEOpVec);
+ return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
+ CEOps.slice(1)));
+ }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return dyn_cast<Instruction>(
+ Builder.CreateBinOp((Instruction::BinaryOps)OpCode,
+ CE->getOperand(0), CE->getOperand(1),
+ CE->getName()));
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast:
+ return dyn_cast<Instruction>(
+ Builder.CreateCast((Instruction::CastOps)OpCode,
+ CE->getOperand(0), CE->getType(),
+ CE->getName()));
+ default:
+ llvm_unreachable("Unhandled constant expression!\n");
+ }
+}
+
+static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
+ do {
+ SmallVector<WeakVH,8> WUsers;
+ for (Value::use_iterator I = CE->use_begin(), E = CE->use_end();
+ I != E; ++I)
+ WUsers.push_back(WeakVH(*I));
+ std::sort(WUsers.begin(), WUsers.end());
+ WUsers.erase(std::unique(WUsers.begin(), WUsers.end()), WUsers.end());
+ while (!WUsers.empty())
+ if (WeakVH WU = WUsers.pop_back_val()) {
+ if (PHINode *PN = dyn_cast<PHINode>(WU)) {
+ for (int I = 0, E = PN->getNumIncomingValues(); I < E; ++I)
+ if (PN->getIncomingValue(I) == CE) {
+ BasicBlock *PredBB = PN->getIncomingBlock(I);
+ if (PredBB->getTerminator()->getNumSuccessors() > 1)
+ PredBB = SplitEdge(PredBB, PN->getParent(), P);
+ Instruction *InsertPos = PredBB->getTerminator();
+ Instruction *NewInst = createReplacementInstr(CE, InsertPos);
+ PN->setOperand(I, NewInst);
+ }
+ } else if (Instruction *Instr = dyn_cast<Instruction>(WU)) {
+ Instruction *NewInst = createReplacementInstr(CE, Instr);
+ Instr->replaceUsesOfWith(CE, NewInst);
+ } else {
+ ConstantExpr *CExpr = dyn_cast<ConstantExpr>(WU);
+ if (!CExpr || !replaceConstantExprOp(CExpr, P))
+ return false;
+ }
+ }
+ } while (CE->hasNUsesOrMore(1)); // We need to check becasue a recursive
+ // sibbling may have used 'CE' when createReplacementInstr was called.
+ CE->destroyConstant();
+ return true;
+}
- return false;
+static bool rewriteNonInstructionUses(GlobalVariable *GV, Pass *P) {
+ SmallVector<WeakVH,8> WUsers;
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I)
+ if (!isa<Instruction>(*I))
+ WUsers.push_back(WeakVH(*I));
+ while (!WUsers.empty())
+ if (WeakVH WU = WUsers.pop_back_val()) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(WU);
+ if (!CE || !replaceConstantExprOp(CE, P))
+ return false;
+ }
+ return true;
}
static bool isZeroLengthArray(Type *Ty) {
@@ -92,14 +186,16 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
return false;
// Skip globals that we can't lower and leave it for the backend to error.
- if (hasNonInstructionUse(GV) ||
+ if (!rewriteNonInstructionUses(GV, this) ||
!GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
return false;
// Create replacement global.
ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
- Constant *NewInitializer = createLoweredInitializer(NewType,
- GV->getInitializer());
+ Constant *NewInitializer = 0;
+ if (GV->hasInitializer())
+ NewInitializer = createLoweredInitializer(NewType,
+ GV->getInitializer());
GlobalVariable *NewGV =
new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
diff --git a/lib/Target/XCore/XCoreMCInstLower.cpp b/lib/Target/XCore/XCoreMCInstLower.cpp
index f96eda9..def2673 100644
--- a/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -43,7 +43,7 @@ MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Symbol = MO.getMBB()->getSymbol();
break;
case MachineOperand::MO_GlobalAddress:
- Symbol = Mang->getSymbol(MO.getGlobal());
+ Symbol = Printer.getSymbol(MO.getGlobal());
Offset += MO.getOffset();
break;
case MachineOperand::MO_BlockAddress:
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 3ef1520..9ae0b86 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -70,3 +70,11 @@ bool XCorePassConfig::addInstSelector() {
extern "C" void LLVMInitializeXCoreTarget() {
RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
}
+
+void XCoreTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our XCore pass. This
+ // allows the XCore pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createXCoreTargetTransformInfoPass(this));
+}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index eb9a1aa..a19a677 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -57,6 +57,8 @@ public:
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
new file mode 100644
index 0000000..cc165f7
--- /dev/null
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
@@ -0,0 +1,83 @@
+//===-- XCoreTargetTransformInfo.cpp - XCore specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// XCore target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcoretti"
+#include "XCore.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't havve a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeXCoreTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class XCoreTTI : public ImmutablePass, public TargetTransformInfo {
+public:
+ XCoreTTI() : ImmutablePass(ID) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ XCoreTTI(const XCoreTargetMachine *TM)
+ : ImmutablePass(ID) {
+ initializeXCoreTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() {
+ pushTTIStack(this);
+ }
+
+ virtual void finalizePass() {
+ popTTIStack();
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ unsigned getNumberOfRegisters(bool Vector) const {
+ if (Vector) {
+ return 0;
+ }
+ return 12;
+ }
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(XCoreTTI, TargetTransformInfo, "xcoretti",
+ "XCore Target Transform Info", true, true, false)
+char XCoreTTI::ID = 0;
+
+
+ImmutablePass *
+llvm::createXCoreTargetTransformInfoPass(const XCoreTargetMachine *TM) {
+ return new XCoreTTI(TM);
+}