aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2015-04-10 21:22:52 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-04-10 21:23:04 +0000
commit31195f0bdca6ee2a5e72d07edf13e1d81206d949 (patch)
tree1b2c9792582e12f5af0b1512e3094425f0dc0df9 /lib/Target
parentc75239e6119d0f9a74c57099d91cbc9bde56bf33 (diff)
parent4c5e43da7792f75567b693105cc53e3f1992ad98 (diff)
downloadexternal_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.zip
external_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.tar.gz
external_llvm-31195f0bdca6ee2a5e72d07edf13e1d81206d949.tar.bz2
Merge "Update aosp/master llvm for rebase to r233350"
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td7
-rw-r--r--lib/Target/AArch64/AArch64A53Fix835769.cpp1
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp26
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp1
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp77
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp4
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp302
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h35
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp10
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td297
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp127
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp11
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.cpp2
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp7
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp47
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h19
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp2
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h3
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp31
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp21
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h7
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp12
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp125
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp21
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp129
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h20
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp96
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h51
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp12
-rw-r--r--lib/Target/ARM/ARM.td30
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp72
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp34
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp83
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h27
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp9
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp457
-rw-r--r--lib/Target/ARM/ARMISelLowering.h33
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td15
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td215
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td107
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp3
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h6
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp2
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp66
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h4
-rw-r--r--lib/Target/ARM/Android.mk4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp9
-rw-r--r--lib/Target/ARM/CMakeLists.txt3
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMArchName.def3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp29
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp1
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp183
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp72
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp10
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp3
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp22
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp3
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.cpp53
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h38
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp4
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp (renamed from lib/Target/ARM/Thumb1RegisterInfo.cpp)128
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.h (renamed from lib/Target/ARM/Thumb1RegisterInfo.h)14
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp6
-rw-r--r--lib/Target/BPF/BPFISelLowering.h1
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.h3
-rw-r--r--lib/Target/BPF/BPFTargetMachine.cpp4
-rw-r--r--lib/Target/BPF/BPFTargetMachine.h7
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp1
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp10
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h1
-rw-r--r--lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp3
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h15
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/Hexagon.h1
-rw-r--r--lib/Target/Hexagon/Hexagon.td16
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp17
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp161
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp1555
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp896
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h81
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td63
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td94
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp179
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h22
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td473
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td465
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td8
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoVector.td418
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td49
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV4.td10
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td388
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h12
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp85
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp172
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h5
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp23
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h4
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp10
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp22
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h1
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h2
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp8
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h3
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp3
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp5
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h4
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp99
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp148
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp36
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td8
-rw-r--r--lib/Target/Mips/Mips.h5
-rw-r--r--lib/Target/Mips/Mips.td12
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp185
-rw-r--r--lib/Target/Mips/Mips16HardFloat.h43
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp9
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h2
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp27
-rw-r--r--lib/Target/Mips/MipsCallingConv.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp36
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp92
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp17
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h2
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp46
-rw-r--r--lib/Target/Mips/MipsISelLowering.h11
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td24
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp1
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp17
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp38
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.h58
-rw-r--r--lib/Target/Mips/MipsOs16.cpp129
-rw-r--r--lib/Target/Mips/MipsOs16.h47
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp15
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h14
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp84
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h10
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp14
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsSchedule.td4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp22
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h4
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetObjectFile.h4
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h7
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp50
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h2
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.cpp40
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h28
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp34
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp11
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h2
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h6
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp35
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h29
-rw-r--r--lib/Target/NVPTX/NVPTXLowerStructArgs.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h3
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h5
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp12
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h3
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXutil.cpp90
-rw-r--r--lib/Target/NVPTX/NVPTXutil.h25
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp38
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp8
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp6
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp16
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp122
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h3
-rw-r--r--lib/Target/PowerPC/PPC.td19
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp23
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp5
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp16
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp48
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp297
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h26
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td28
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td94
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td87
-rw-r--r--lib/Target/PowerPC/PPCInstrHTM.td172
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td82
-rw-r--r--lib/Target/PowerPC/PPCInstrQPX.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td2
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp6
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp21
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp3
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp170
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h52
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCSchedule.td398
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp3
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp17
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp4
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h1
-rw-r--r--lib/Target/PowerPC/README.txt7
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt104
-rw-r--r--lib/Target/R600/AMDGPU.td5
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp10
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp127
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp3
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp30
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h13
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td22
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td1
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp13
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp5
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp2
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h9
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp30
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h10
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp5
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp26
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp7
-rw-r--r--lib/Target/R600/EvergreenInstructions.td5
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp16
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp58
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp1
-rw-r--r--lib/Target/R600/Processors.td8
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp2
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp10
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp14
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp4
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp10
-rw-r--r--lib/Target/R600/R600RegisterInfo.h2
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp1
-rw-r--r--lib/Target/R600/SIFoldOperands.cpp3
-rw-r--r--lib/Target/R600/SIISelLowering.cpp53
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp6
-rw-r--r--lib/Target/R600/SIInstrFormats.td11
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp112
-rw-r--r--lib/Target/R600/SIInstrInfo.h6
-rw-r--r--lib/Target/R600/SIInstrInfo.td569
-rw-r--r--lib/Target/R600/SIInstructions.td486
-rw-r--r--lib/Target/R600/SILoadStoreOptimizer.cpp5
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp84
-rw-r--r--lib/Target/R600/SIRegisterInfo.h9
-rw-r--r--lib/Target/R600/SIRegisterInfo.td7
-rw-r--r--lib/Target/R600/SIShrinkInstructions.cpp8
-rw-r--r--lib/Target/README.txt41
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp1
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp96
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h1
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp9
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp14
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp16
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h15
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp11
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h6
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp1
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp12
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h1
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp39
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h20
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h6
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp10
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h6
-rw-r--r--lib/Target/Target.cpp4
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp6
-rw-r--r--lib/Target/TargetMachine.cpp17
-rw-r--r--lib/Target/TargetMachineC.cpp3
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp20
-rw-r--r--lib/Target/X86/Android.mk2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp42
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h8
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp42
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp9
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h12
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp417
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp238
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp8
-rw-r--r--lib/Target/X86/README-SSE.txt78
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp3
-rw-r--r--lib/Target/X86/X86FastISel.cpp152
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp6
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp21
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1258
-rw-r--r--lib/Target/X86/X86ISelLowering.h147
-rw-r--r--lib/Target/X86/X86InstrAVX512.td773
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td108
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp55
-rw-r--r--lib/Target/X86/X86InstrInfo.h21
-rw-r--r--lib/Target/X86/X86InstrInfo.td18
-rw-r--r--lib/Target/X86/X86InstrSSE.td163
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h2
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp4
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp81
-rw-r--r--lib/Target/X86/X86RegisterInfo.h17
-rw-r--r--lib/Target/X86/X86SchedHaswell.td4
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--lib/Target/X86/X86TargetMachine.h4
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp42
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h23
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp17
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp8
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp4
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h7
385 files changed, 10436 insertions, 8245 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index dff48f9..bb3db4b 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Enable ARMv8.1a extensions", [FeatureCRC]>;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -89,6 +92,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC]>;
+def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
+ FeatureNEON,
+ FeatureCrypto]>;
+
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index dd401c6..3bc5a54 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 2cf3c22..bffd9e6 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -142,7 +142,7 @@ private:
int scavengeRegister(Chain *G, Color C, MachineBasicBlock &MBB);
void scanInstruction(MachineInstr *MI, unsigned Idx,
std::map<unsigned, Chain*> &Active,
- std::set<std::unique_ptr<Chain>> &AllChains);
+ std::vector<std::unique_ptr<Chain>> &AllChains);
void maybeKillChain(MachineOperand &MO, unsigned Idx,
std::map<unsigned, Chain*> &RegChains);
Color getColor(unsigned Register);
@@ -287,12 +287,12 @@ public:
raw_string_ostream OS(S);
OS << "{";
- StartInst->print(OS, NULL, true);
+ StartInst->print(OS, /* SkipOpers= */true);
OS << " -> ";
- LastInst->print(OS, NULL, true);
+ LastInst->print(OS, /* SkipOpers= */true);
if (KillInst) {
OS << " (kill @ ";
- KillInst->print(OS, NULL, true);
+ KillInst->print(OS, /* SkipOpers= */true);
OS << ")";
}
OS << "}";
@@ -307,6 +307,11 @@ public:
//===----------------------------------------------------------------------===//
bool AArch64A57FPLoadBalancing::runOnMachineFunction(MachineFunction &F) {
+ // Don't do anything if this isn't an A53 or A57.
+ if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
+ F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+ return false;
+
bool Changed = false;
DEBUG(dbgs() << "***** AArch64A57FPLoadBalancing *****\n");
@@ -331,7 +336,7 @@ bool AArch64A57FPLoadBalancing::runOnBasicBlock(MachineBasicBlock &MBB) {
// been killed yet. This is keyed by register - all chains can only have one
// "link" register between each inst in the chain.
std::map<unsigned, Chain*> ActiveChains;
- std::set<std::unique_ptr<Chain>> AllChains;
+ std::vector<std::unique_ptr<Chain>> AllChains;
unsigned Idx = 0;
for (auto &MI : MBB)
scanInstruction(&MI, Idx++, ActiveChains, AllChains);
@@ -598,10 +603,9 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
return Changed;
}
-void AArch64A57FPLoadBalancing::
-scanInstruction(MachineInstr *MI, unsigned Idx,
- std::map<unsigned, Chain*> &ActiveChains,
- std::set<std::unique_ptr<Chain>> &AllChains) {
+void AArch64A57FPLoadBalancing::scanInstruction(
+ MachineInstr *MI, unsigned Idx, std::map<unsigned, Chain *> &ActiveChains,
+ std::vector<std::unique_ptr<Chain>> &AllChains) {
// Inspect "MI", updating ActiveChains and AllChains.
if (isMul(MI)) {
@@ -620,7 +624,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else if (isMla(MI)) {
@@ -664,7 +668,7 @@ scanInstruction(MachineInstr *MI, unsigned Idx,
<< TRI->getName(DestReg) << "\n");
auto G = llvm::make_unique<Chain>(MI, Idx, getColor(DestReg));
ActiveChains[DestReg] = G.get();
- AllChains.insert(std::move(G));
+ AllChains.push_back(std::move(G));
} else {
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 287989f..716e1a3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -41,6 +41,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index d64d851..1b4483a 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,12 +12,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -34,8 +36,10 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -49,7 +53,7 @@ class AArch64AsmPrinter : public AsmPrinter {
public:
AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
- SM(*this), AArch64FI(nullptr), LOHLabelCounter(0) {}
+ SM(*this), AArch64FI(nullptr) {}
const char *getPassName() const override {
return "AArch64 Assembly Printer";
@@ -110,7 +114,6 @@ private:
typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
MInstToMCSymbol LOHInstToLabel;
- unsigned LOHLabelCounter;
};
} // end of anonymous namespace
@@ -219,6 +222,17 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
O << '#' << Imm;
break;
}
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *Sym = getSymbol(GV);
+
+ // FIXME: Can we get anything other than a plain symbol here?
+ assert(!MO.getTargetFlags() && "Unknown operand target flag!");
+
+ O << *Sym;
+ printOffset(MO.getOffset(), O);
+ break;
+ }
}
}
@@ -450,7 +464,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (AArch64FI->getLOHRelated().count(MI)) {
// Generate a label for LOH related instruction
- MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+ MCSymbol *LOHLabel = createTempSymbol("loh");
// Associate the instruction with the label
LOHInstToLabel[MI] = LOHLabel;
OutStreamer.EmitLabel(LOHLabel);
@@ -489,24 +503,57 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
- // First emit a relocation-annotation. This expands to no code, but requests
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
+
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 3b74481..06ff9af 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 938dcb3..568f258 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -279,7 +279,7 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
/// definition. It also consider definitions of ADRP instructions as uses and
/// ignore other uses. The ADRPMode is used to collect the information for LHO
/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction &MF,
+static void initReachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
BlockToSetOfInstrsPerColor &ReachableUses,
@@ -288,7 +288,7 @@ static void initReachingDef(MachineFunction &MF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
unsigned NbReg = RegToId.size();
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
auto &BBGen = Gen[&MBB];
BBGen = make_unique<const MachineInstr *[]>(NbReg);
std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
@@ -382,7 +382,7 @@ static void initReachingDef(MachineFunction &MF,
/// op.reachedUses
///
/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction &MF,
+static void reachingDefAlgorithm(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
@@ -392,7 +392,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
bool HasChanged;
do {
HasChanged = false;
- for (MachineBasicBlock &MBB : MF) {
+ for (const MachineBasicBlock &MBB : MF) {
unsigned CurReg;
for (CurReg = 0; CurReg < NbReg; ++CurReg) {
SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -401,7 +401,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
unsigned Size = BBOutSet.size();
// In[bb][color] = U Out[bb.predecessors][color]
- for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
+ for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
}
@@ -433,7 +433,7 @@ static void reachingDefAlgorithm(MachineFunction &MF,
/// @p DummyOp.
/// \pre ColorOpToReachedUses is an array of at least number of registers of
/// InstrToInstrs.
-static void reachingDef(MachineFunction &MF,
+static void reachingDef(const MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
const MapRegToId &RegToId, bool ADRPMode = false,
const MachineInstr *DummyOp = nullptr) {
@@ -983,7 +983,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
/// Look for every register defined by potential LOHs candidates.
/// Map these registers with dense id in @p RegToId and vice-versa in
/// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
-static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
+static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
MapIdToReg &IdToReg,
const TargetRegisterInfo *TRI) {
unsigned CurRegId = 0;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 61017c1..99cb641 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3158,7 +3158,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -4563,7 +4563,7 @@ bool AArch64FastISel::selectShift(const Instruction *I) {
unsigned ResultReg = 0;
uint64_t ShiftVal = C->getZExtValue();
MVT SrcVT = RetVT;
- bool IsZExt = (I->getOpcode() == Instruction::AShr) ? false : true;
+ bool IsZExt = I->getOpcode() != Instruction::AShr;
const Value *Op0 = I->getOperand(0);
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
if (!isIntExtFree(ZExt)) {
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index ac11c4d..0a47dcb 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
SDNode *SelectMLAV64LaneV128(SDNode *N);
@@ -211,13 +211,20 @@ static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
}
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all AArch64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
/// SelectArithImmed - Select an immediate value that can be represented as
@@ -299,7 +306,7 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
-/// \brief Determine wether it is worth to fold V into an extended register.
+/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
// it hurts if the value is used at least twice, unless we are optimizing
// for code size.
@@ -1055,7 +1062,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
SDValue Ops[] = {N->getOperand(2), // Mem operand;
Chain};
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
SDValue SuperReg = SDValue(Ld, 0);
@@ -1077,8 +1084,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
N->getOperand(2), // Incremental
Chain};
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
@@ -1119,8 +1126,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
EVT VT = N->getOperand(2)->getValueType(0);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other}; // Type for the Chain
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other}; // Type for the Chain
// Form a REG_SEQUENCE to force register allocation.
bool Is128Bit = VT.getSizeInBits() == 128;
@@ -1136,6 +1143,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
return St;
}
+namespace {
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
class WidenVector {
@@ -1156,6 +1164,7 @@ public:
return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
};
+} // namespace
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
@@ -1184,7 +1193,7 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
@@ -1224,8 +1233,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Untyped, MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Untyped, MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
@@ -1309,8 +1318,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
SDValue RegSeq = createQTuple(Regs);
- EVT ResTys[] = {MVT::i64, // Type of the write back register
- MVT::Other};
+ const EVT ResTys[] = {MVT::i64, // Type of the write back register
+ MVT::Other};
unsigned LaneNo =
cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index a1b324e..0c0e856 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,8 +64,16 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
@@ -362,9 +370,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
+ for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -561,9 +567,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
// AArch64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
- MVT Ty = RoundingVecTypes[I];
+ for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -752,7 +756,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -811,6 +815,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
+ case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
+ case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
+ case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
+ case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
+ case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
@@ -1247,7 +1257,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
@@ -2784,13 +2794,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
- Mask = TRI->getThisReturnPreservedMask(CallConv);
+ Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = TRI->getCallPreservedMask(CallConv);
+ Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3027,58 +3037,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3089,9 +3075,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3102,17 +3097,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3127,19 +3125,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3148,40 +3133,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3189,7 +3157,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
@@ -3356,11 +3324,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
EVT VecVT;
EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
+ uint64_t EltMask;
+ SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+ EltMask = 0x80000000ULL;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3378,7 +3347,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
// We want to materialize a mask with the the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
+ EltMask = 0;
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3393,11 +3362,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
llvm_unreachable("Invalid type for copysign!");
}
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+ SDValue BuildVec = DAG.getConstant(EltMask, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
@@ -5927,8 +5892,10 @@ FailedModImm:
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ EVT EltTy = VT.getVectorElementType();
+ assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+ "Unsupported floating-point vector type");
+ MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -6781,7 +6748,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
+ return Shift < 3;
}
// Generate SUBS and CSEL for integer abs.
@@ -6898,6 +6865,15 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(0));
}
} else {
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt VNM1 = -Value - 1;
if (VNM1.isPowerOf2()) {
@@ -6908,15 +6884,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
}
- // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
- APInt VNP1 = -Value + 1;
- if (VNP1.isPowerOf2()) {
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VNP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
- ShiftedVal);
- }
}
}
return SDValue();
@@ -7211,21 +7178,54 @@ static SDValue performBitcastCombine(SDNode *N,
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+ // Optimize concat_vectors of truncated vectors, where the intermediate
+ // type is illegal, to avoid said illegality, e.g.,
+ // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+ // (v2i16 (truncate (v2i64)))))
+ // ->
+ // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+ // (v4i32 (bitcast (v2i64))),
+ // <0, 2, 4, 6>)))
+ // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+ // on both input and result type, so we might generate worse code.
+ // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+ if (N->getNumOperands() == 2 &&
+ N0->getOpcode() == ISD::TRUNCATE &&
+ N1->getOpcode() == ISD::TRUNCATE) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N10 = N1->getOperand(0);
+ EVT N00VT = N00.getValueType();
+
+ if (N00VT == N10.getValueType() &&
+ (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+ N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+ MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+ SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+ for (size_t i = 0; i < Mask.size(); ++i)
+ Mask[i] = i * 2;
+ return DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getVectorShuffle(
+ MidVT, dl,
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+ DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+ }
+ }
+
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ if (N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, MVT::i64));
}
@@ -7238,10 +7238,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
+ if (N1->getOpcode() != ISD::BITCAST)
return SDValue();
- SDValue RHS = Op1->getOperand(0);
+ SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
@@ -7251,10 +7250,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+ RHS));
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7651,6 +7650,15 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+ SelectionDAG &DAG) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
+ DAG.getNode(Opc, SDLoc(N),
+ N->getOperand(1).getSimpleValueType(),
+ N->getOperand(1)),
+ DAG.getConstant(0, MVT::i64));
+}
+
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -7663,6 +7671,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
break;
+ case Intrinsic::aarch64_neon_saddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+ case Intrinsic::aarch64_neon_uaddv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+ case Intrinsic::aarch64_neon_sminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+ case Intrinsic::aarch64_neon_uminv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+ case Intrinsic::aarch64_neon_smaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+ case Intrinsic::aarch64_neon_umaxv:
+ return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -8792,9 +8812,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128;
+ return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index e973364..5ff11e8 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -30,9 +30,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -141,6 +141,18 @@ enum {
FCMLEz,
FCMLTz,
+ // Vector across-lanes addition
+ // Only the lower result lane is defined.
+ SADDV,
+ UADDV,
+
+ // Vector across-lanes min/max
+ // Only the lower result lane is defined.
+ SMINV,
+ UMINV,
+ SMAXV,
+ UMAXV,
+
// Vector bitwise negation
NOT,
@@ -335,7 +347,8 @@ public:
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
@@ -399,8 +412,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -460,6 +473,16 @@ private:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
+ // followed by llvm_unreachable so we'll leave them unimplemented in
+ // the backend for now.
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 64cec55..8e0af2d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- RI(this, &STI), Subtarget(STI) {}
+ RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
@@ -2068,10 +2068,10 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
-MachineInstr *
-AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// This is a bit of a hack. Consider this instruction:
//
// %vreg0<def> = COPY %SP; GPR64all:%vreg0
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index d8f1274..fa4b8b7 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -129,10 +129,9 @@ public:
const TargetRegisterInfo *TRI) const override;
using TargetInstrInfo::foldMemoryOperandImpl;
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 6e4c0b0..ec6fa5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -96,6 +98,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,10 +244,11 @@ def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -244,6 +260,13 @@ def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
+def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
+def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
+def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
+def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
+def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
+def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -1049,15 +1072,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
@@ -2326,8 +2350,15 @@ defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
defm FMOV : UnscaledConversion<"fmov">;
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
+// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVWSr FPR32:$Rd, WZR)>,
+ Requires<[NoZCZ]>;
+def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
+ PseudoInstExpansion<(FMOVXDr FPR64:$Rd, XZR)>,
+ Requires<[NoZCZ]>;
+}
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
@@ -3416,10 +3447,10 @@ defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
+def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
+def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
@@ -3709,10 +3740,6 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
// Floating point vector extractions are codegen'd as either a sequence of
@@ -3776,121 +3803,143 @@ defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
+// Patterns for across-vector intrinsics, that have a node equivalent, that
+// returns a vector (with only the low lane defined) instead of a scalar.
+// In effect, opNode is the same as (scalar_to_vector (IntNode)).
+multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
+ SDPatternOperator opNode> {
+// If a lane instruction caught the vector_extract around opNode, we can
+// directly match the latter to the instruction.
+def : Pat<(v8i8 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
+def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
+def : Pat<(v4i16 (opNode V64:$Rn)),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
+def : Pat<(v8i16 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
+def : Pat<(v4i32 (opNode V128:$Rn)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
+
+
+// If none did, fallback to the explicit patterns, consuming the vector_extract.
+def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
+ (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
+ bsub), ssub)>;
+def : Pat<(i32 (vector_extract (insert_subvector undef,
+ (v4i16 (opNode V64:$Rn)), (i32 0)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
+ hsub), ssub)>;
+def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
+ (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
+ ssub), ssub)>;
+
+}
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (sext_inreg (i32 (vector_extract
+ (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
}
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
+ SDPatternOperator opNode>
+ : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v8i8 V64:$Rn)), (i32 0)), (i64 0))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
+ maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
+ (opNode (v4i16 V64:$Rn)), (i32 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
+ maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
+}
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
-}
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
+ (ADDPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
+def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
+def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
+ (SMINPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
+def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
+def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
+ (UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
@@ -3953,32 +4002,6 @@ def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
dsub))>;
}
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
-def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
-def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
-def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
-def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8463ce6..b1499e2 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -63,16 +63,24 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
+ // \p SExtIdx[out] gives the index of the result of the load pair that
+ // must be extended. The value of SExtIdx assumes that the paired load
+ // produces the value in this order: (I, returned iterator), i.e.,
+ // -1 means no value has to be extended, 0 means I, and 1 means the
+ // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward,
+ bool &MergeForward, int &SExtIdx,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
+ // \p SExtIdx index of the result that must be extended for a paired load.
+ // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward,
+ int SExtIdx);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -181,6 +189,43 @@ int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
}
}
+static unsigned getMatchingNonSExtOpcode(unsigned Opc,
+ bool *IsValidLdStrOpc = nullptr) {
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = true;
+ switch (Opc) {
+ default:
+ if (IsValidLdStrOpc)
+ *IsValidLdStrOpc = false;
+ return UINT_MAX;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return Opc;
+ case AArch64::LDRSWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURSWi:
+ return AArch64::LDURWi;
+ }
+}
+
static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
@@ -282,7 +327,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward) {
+ bool MergeForward, int SExtIdx) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -292,11 +337,13 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
- bool IsUnscaled = isUnscaledLdst(I->getOpcode());
+ unsigned Opc =
+ SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
+ bool IsUnscaled = isUnscaledLdst(Opc);
int OffsetStride =
IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
- unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
+ unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
@@ -311,6 +358,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
Paired->getOperand(2).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
+ // Here we swapped the assumption made for SExtIdx.
+ // I.e., we turn ldp I, Paired into ldp Paired, I.
+ // Update the index accordingly.
+ if (SExtIdx != -1)
+ SExtIdx = (SExtIdx + 1) % 2;
} else {
RtMI = I;
Rt2MI = Paired;
@@ -337,8 +389,47 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
DEBUG(dbgs() << " ");
DEBUG(Paired->print(dbgs()));
DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
+
+ if (SExtIdx != -1) {
+ // Generate the sign extension for the proper result of the ldp.
+ // I.e., with X1, that would be:
+ // %W1<def> = KILL %W1, %X1<imp-def>
+ // %X1<def> = SBFMXri %X1<kill>, 0, 31
+ MachineOperand &DstMO = MIB->getOperand(SExtIdx);
+ // Right now, DstMO has the extended register, since it comes from an
+ // extended opcode.
+ unsigned DstRegX = DstMO.getReg();
+ // Get the W variant of that register.
+ unsigned DstRegW = TRI->getSubReg(DstRegX, AArch64::sub_32);
+ // Update the result of LDP to use the W instead of the X variant.
+ DstMO.setReg(DstRegW);
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ // Make the machine verifier happy by providing a definition for
+ // the X register.
+ // Insert this definition right after the generated LDP, i.e., before
+ // InsertionPoint.
+ MachineInstrBuilder MIBKill =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(TargetOpcode::KILL), DstRegW)
+ .addReg(DstRegW)
+ .addReg(DstRegX, RegState::Define);
+ MIBKill->getOperand(2).setImplicit();
+ // Create the sign extension.
+ MachineInstrBuilder MIBSXTW =
+ BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::SBFMXri), DstRegX)
+ .addReg(DstRegX)
+ .addImm(0)
+ .addImm(31);
+ (void)MIBSXTW;
+ DEBUG(dbgs() << " Extend operand:\n ");
+ DEBUG(((MachineInstr *)MIBSXTW)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ } else {
+ DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
// Erase the old instructions.
I->eraseFromParent();
@@ -396,7 +487,8 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, unsigned Limit) {
+ bool &MergeForward, int &SExtIdx,
+ unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -436,7 +528,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Now that we know this is a real instruction, count it.
++Count;
- if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) {
+ bool CanMergeOpc = Opc == MI->getOpcode();
+ SExtIdx = -1;
+ if (!CanMergeOpc) {
+ bool IsValidLdStrOpc;
+ unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
+ if (!IsValidLdStrOpc)
+ continue;
+ // Opc will be the first instruction in the pair.
+ SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
+ }
+
+ if (CanMergeOpc && MI->getOperand(2).isImm()) {
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -823,13 +927,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
bool MergeForward = false;
+ int SExtIdx = -1;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index e57b0f4..b829341 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
index 4690177..5394875 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.cpp
@@ -319,7 +319,7 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
static bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
const MachineInstr &MI) {
- LiveInterval LI = LIs.getInterval(reg);
+ const LiveInterval &LI = LIs.getInterval(reg);
SlotIndex SI = LIs.getInstructionIndex(&MI);
return LI.expiredAt(SI);
}
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index c037c86..e1b93bf 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -38,6 +38,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -189,9 +190,11 @@ private:
IPI->second.push_back(&Use);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
- // Keep a copy of the key to find the iterator to erase.
+ // Keep a copy of the key to find the iterator to erase. Keep a copy of the
+ // value so that we don't have to dereference IPI->second.
Instruction *OldInstr = IPI->first;
- InsertPts[NewPt] = std::move(IPI->second);
+ Uses OldUses = std::move(IPI->second);
+ InsertPts[NewPt] = std::move(OldUses);
// Erase IPI.
InsertPts.erase(OldInstr);
}
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 206cdbb..33c11fe 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -37,9 +38,8 @@ static cl::opt<bool>
ReserveX18("aarch64-reserve-x18", cl::Hidden,
cl::desc("Reserve X18, making it unavailable as GPR"));
-AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
- const AArch64Subtarget *sti)
- : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
+AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
+ : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -55,7 +55,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_AArch64_NoRegs_RegMask;
@@ -66,15 +67,16 @@ AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
- if (STI->isTargetDarwin())
+ if (TT.isOSDarwin())
return CSR_AArch64_TLS_Darwin_RegMask;
- assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+ assert(TT.isOSBinFormatELF() && "only expect Darwin or ELF TLS");
return CSR_AArch64_TLS_ELF_RegMask;
}
const uint32_t *
-AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i64 argument (which must also be the register used to return a
@@ -97,12 +99,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::WSP);
Reserved.set(AArch64::WZR);
- if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+ if (TFI->hasFP(MF) || TT.isOSDarwin()) {
Reserved.set(AArch64::FP);
Reserved.set(AArch64::W29);
}
- if (STI->isTargetDarwin() || ReserveX18) {
+ if (TT.isOSDarwin() || ReserveX18) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -129,10 +131,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return STI->isTargetDarwin() || ReserveX18;
+ return TT.isOSDarwin() || ReserveX18;
case AArch64::FP:
case AArch64::W29:
- return TFI->hasFP(MF) || STI->isTargetDarwin();
+ return TFI->hasFP(MF) || TT.isOSDarwin();
case AArch64::W19:
case AArch64::X19:
return hasBasePointer(MF);
@@ -269,7 +271,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
- if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+ if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, AArch64::FP, FPOffset))
return false;
// If we can reference via the stack pointer or base pointer, try that.
@@ -277,7 +279,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (isFrameOffsetLegal(MI, Offset))
+ if (isFrameOffsetLegal(MI, AArch64::SP, Offset))
return false;
// The offset likely isn't legal; we want to allocate a virtual base register.
@@ -285,6 +287,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
}
bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
assert(Offset <= INT_MAX && "Offset too big to fit in int.");
assert(MI && "Unable to get the legal offset for nil instruction.");
@@ -302,10 +305,11 @@ void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
DebugLoc DL; // Defaults to "unknown"
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
-
+ const MachineFunction &MF = *MBB->getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
@@ -324,6 +328,9 @@ void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
}
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const AArch64InstrInfo *TII =
+ MF->getSubtarget<AArch64Subtarget>().getInstrInfo();
bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
assert(Done && "Unable to resolve frame index!");
(void)Done;
@@ -337,6 +344,8 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const AArch64InstrInfo *TII =
+ MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
MF.getSubtarget().getFrameLowering());
@@ -389,10 +398,10 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - (STI->isTargetDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 51a5034..c01bfa5 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -19,26 +19,24 @@
namespace llvm {
-class AArch64InstrInfo;
-class AArch64Subtarget;
class MachineFunction;
class RegScavenger;
class TargetRegisterClass;
+class Triple;
struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
private:
- const AArch64InstrInfo *TII;
- const AArch64Subtarget *STI;
+ const Triple &TT;
public:
- AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
+ AArch64RegisterInfo(const Triple &TT);
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
unsigned getCSRFirstUseCost() const override {
// The cost will be compared against BlockFrequency where entry has the
@@ -59,7 +57,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *
@@ -73,7 +72,7 @@ public:
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
int FrameIdx,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index c613025..221d70d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -48,7 +48,7 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)),
TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index d418cc5..bcab97d 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -41,6 +41,7 @@ protected:
bool HasNEON;
bool HasCrypto;
bool HasCRC;
+ bool HasV8_1a;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -86,6 +87,7 @@ public:
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostMachineScheduler() const override {
return isCortexA53() || isCortexA57();
@@ -99,6 +101,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d73d0b3..f902f64 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -104,6 +104,16 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<AArch64_ELFTargetObjectFile>();
}
+// Helper function to build a DataLayout string
+static std::string computeDataLayout(StringRef TT, bool LittleEndian) {
+ Triple Triple(TT);
+ if (Triple.isOSBinFormatMachO())
+ return "e-m:o-i64:64-i128:128-n32:64-S128";
+ if (LittleEndian)
+ return "e-m:e-i64:64-i128:128-n32:64-S128";
+ return "E-m:e-i64:64-i128:128-n32:64-S128";
+}
+
/// TargetMachine ctor - Create an AArch64 architecture model.
///
AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
@@ -112,16 +122,12 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool LittleEndian)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Triple(TT).isOSBinFormatMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized before TLInfo is constructed.
+ : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- Subtarget(TT, CPU, FS, *this, LittleEndian), isLittle(LittleEndian) {
+ isLittle(LittleEndian) {
initAsmInfo();
}
@@ -239,7 +245,7 @@ bool AArch64PassConfig::addPreISel() {
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
@@ -287,10 +293,7 @@ void AArch64PassConfig::addPostRegAlloc() {
// Change dead register definitions to refer to the zero register.
if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
addPass(createAArch64DeadRegisterDefinitions());
- if (TM->getOptLevel() != CodeGenOpt::None &&
- (TM->getSubtarget<AArch64Subtarget>().isCortexA53() ||
- TM->getSubtarget<AArch64Subtarget>().isCortexA57()) &&
- usingDefaultRegAlloc())
+ if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
// Improve performance for some FP/SIMD code for A57.
addPass(createAArch64A57FPLoadBalancing());
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 7143adf..ec34fad 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -23,9 +23,7 @@ namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AArch64Subtarget Subtarget;
mutable StringMap<std::unique_ptr<AArch64Subtarget>> SubtargetMap;
public:
@@ -35,11 +33,6 @@ public:
CodeGenOpt::Level OL, bool IsLittleEndian);
~AArch64TargetMachine() override;
-
- const DataLayout *getDataLayout() const override { return &DL; }
- const AArch64Subtarget *getSubtargetImpl() const override {
- return &Subtarget;
- }
const AArch64Subtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 4069038..8ff58e9 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
using namespace llvm;
using namespace dwarf;
@@ -23,6 +24,11 @@ void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(TM.Options.UseInitArray);
}
+AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
+ : TargetLoweringObjectFileMachO() {
+ SupportGOTPCRelWithOffset = false;
+}
+
const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -50,3 +56,18 @@ MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
MachineModuleInfo *MMI) const {
return TM.getSymbol(GV, Mang);
}
+
+const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ assert((Offset+MV.getConstant() == 0) &&
+ "Arch64 does not support GOT PC rel with extra offset");
+ // On ARM64 Darwin, we can reference symbols with foo@GOT-., which
+ // is an indirect pc-relative reference.
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 2e595f9..d41f445 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -24,6 +24,8 @@ class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
+ AArch64_MachoTargetObjectFile();
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding, Mangler &Mang,
const TargetMachine &TM,
@@ -33,6 +35,11 @@ public:
MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
+
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0646d85..0533355 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -10,6 +10,7 @@
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
@@ -352,7 +353,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
// We don't lower vector selects well that are wider than the register width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- unsigned AmortizationCost = 20;
+ const unsigned AmortizationCost = 20;
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
VectorSelectTbl[] = {
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
@@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
+ // Enable partial unrolling and runtime unrolling.
+ BaseT::getUnrollingPreferences(L, UP);
+
+ // For inner loop, it is more likely to be a hot one, and the runtime check
+ // can be promoted out from LICM pass, so the overhead is less, let's try
+ // a larger threshold to unroll more loops.
+ if (L->getLoopDepth() > 1)
+ UP.PartialThreshold *= 2;
+
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1960c99..1219ffc 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -113,11 +113,10 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI) {
- MCAsmParserExtension::Initialize(_Parser);
+ AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI) {
+ MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
@@ -205,6 +204,8 @@ private:
struct BarrierOp {
unsigned Val; // Not the enum since not all values have names.
+ const char *Data;
+ unsigned Length;
};
struct SysRegOp {
@@ -221,6 +222,8 @@ private:
struct PrefetchOp {
unsigned Val;
+ const char *Data;
+ unsigned Length;
};
struct ShiftExtendOp {
@@ -254,8 +257,7 @@ private:
MCContext &Ctx;
public:
- AArch64Operand(KindTy K, MCContext &_Ctx)
- : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
+ AArch64Operand(KindTy K, MCContext &Ctx) : Kind(K), Ctx(Ctx) {}
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
@@ -349,6 +351,11 @@ public:
return Barrier.Val;
}
+ StringRef getBarrierName() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return StringRef(Barrier.Data, Barrier.Length);
+ }
+
unsigned getReg() const override {
assert(Kind == k_Register && "Invalid access!");
return Reg.RegNum;
@@ -384,6 +391,11 @@ public:
return Prefetch.Val;
}
+ StringRef getPrefetchName() const {
+ assert(Kind == k_Prefetch && "Invalid access!");
+ return StringRef(Prefetch.Data, Prefetch.Length);
+ }
+
AArch64_AM::ShiftExtendType getShiftExtendType() const {
assert(Kind == k_ShiftExtend && "Invalid access!");
return ShiftExtend.Type;
@@ -752,58 +764,47 @@ public:
}
bool isMovZSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovZSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
- AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_TPREL_G2,
+ AArch64MCExpr::VK_DTPREL_G2});
}
bool isMovZSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ return isMovWSymbol({
+ AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
AArch64MCExpr::VK_DTPREL_G1,
- };
- return isMovWSymbol(Variants);
+ });
}
bool isMovZSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
- AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_TPREL_G0,
+ AArch64MCExpr::VK_DTPREL_G0});
}
bool isMovKSymbolG3() const {
- static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G3);
}
bool isMovKSymbolG2() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G2_NC};
- return isMovWSymbol(Variants);
+ return isMovWSymbol(AArch64MCExpr::VK_ABS_G2_NC);
}
bool isMovKSymbolG1() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
- AArch64MCExpr::VK_DTPREL_G1_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol({AArch64MCExpr::VK_ABS_G1_NC,
+ AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1_NC});
}
bool isMovKSymbolG0() const {
- static AArch64MCExpr::VariantKind Variants[] = {
- AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
- };
- return isMovWSymbol(Variants);
+ return isMovWSymbol(
+ {AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC});
}
template<int RegWidth, int Shift>
@@ -1608,10 +1609,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1642,10 +1647,14 @@ public:
return Op;
}
- static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val,
+ StringRef Str,
+ SMLoc S,
MCContext &Ctx) {
auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
+ Op->Barrier.Data = Str.data();
+ Op->Barrier.Length = Str.size();
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
@@ -1673,9 +1682,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
<< AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
break;
case k_Barrier: {
- bool Valid;
- StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
- if (Valid)
+ StringRef Name = getBarrierName();
+ if (!Name.empty())
OS << "<barrier " << Name << ">";
else
OS << "<barrier invalid #" << getBarrier() << ">";
@@ -1718,9 +1726,8 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "c" << getSysCR();
break;
case k_Prefetch: {
- bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
- if (Valid)
+ StringRef Name = getPrefetchName();
+ if (!Name.empty())
OS << "<prfop " << Name << ">";
else
OS << "<prfop invalid #" << getPrefetch() << ">";
@@ -1963,7 +1970,11 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ bool Valid;
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
+ S, getContext()));
return MatchOperand_Success;
}
@@ -1973,14 +1984,16 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
}
bool Valid;
- unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64PRFM::PRFMMapper();
+ unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
Parser.Lex(); // Eat identifier token.
- Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Tok.getString(),
+ S, getContext()));
return MatchOperand_Success;
}
@@ -2582,8 +2595,11 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+ bool Valid;
+ auto Mapper = AArch64DB::DBarrierMapper();
+ StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
+ ExprLoc, getContext()));
return MatchOperand_Success;
}
@@ -2593,7 +2609,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
}
bool Valid;
- unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+ auto Mapper = AArch64DB::DBarrierMapper();
+ unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2605,8 +2622,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+ Operands.push_back( AArch64Operand::CreateBarrier(Opt, Tok.getString(),
+ getLoc(), getContext()));
Parser.Lex(); // Consume the option
return MatchOperand_Success;
@@ -2631,8 +2648,8 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
- uint32_t PStateField =
- AArch64PState::PStateMapper().fromString(Tok.getString(), IsKnown);
+ auto PStateMapper = AArch64PState::PStateMapper();
+ uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 423da65..84b63a0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -18,6 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
using namespace llvm;
@@ -493,14 +494,28 @@ void ELFAArch64AsmBackend::processFixupValue(
IsResolved = false;
}
+// Returns whether this fixup is based on an address in the .eh_frame section,
+// and therefore should be byte swapped.
+// FIXME: Should be replaced with something more principled.
+static bool isByteSwappedFixup(const MCExpr *E) {
+ MCValue Val;
+ if (!E->EvaluateAsRelocatable(Val, nullptr, nullptr))
+ return false;
+
+ if (!Val.getSymA() || Val.getSymA()->getSymbol().isUndefined())
+ return false;
+
+ const MCSectionELF *SecELF =
+ dyn_cast<MCSectionELF>(&Val.getSymA()->getSymbol().getSection());
+ return SecELF->getSectionName() == ".eh_frame";
+}
+
void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
// store fixups in .eh_frame section in big endian order
if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
- const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
- const MCSectionELF *SecELF = dyn_cast_or_null<const MCSectionELF>(Sec);
- if (SecELF && SecELF->getSectionName() == ".eh_frame")
+ if (isByteSwappedFixup(Fixup.getValue()))
Value = ByteSwap_32(unsigned(Value));
}
AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8dc6c30..8f780d2 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -203,24 +203,27 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new AArch64TargetAsmStreamer(*S, OS);
- return S;
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new AArch64TargetAsmStreamer(S, OS);
}
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
- new AArch64TargetELFStreamer(*S);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
}
+
+MCTargetStreamer *
+createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new AArch64TargetELFStreamer(S);
+ return nullptr;
+}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 4756a19..9ea49f0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -38,9 +38,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
- AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : Ctx(ctx) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
~AArch64MCCodeEmitter() {}
@@ -205,9 +203,8 @@ public:
MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+ return new AArch64MCCodeEmitter(MCII, Ctx);
}
/// getMachineOpValue - Return binary encoding of operand. If the machine
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index e396df8..9e31508 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 0f7a6b8..38b399d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -123,94 +123,61 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
return nullptr;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- /*LabelSections*/ true);
-
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &TAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
+static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ DWARFMustBeAtTheEnd,
+ /*LabelSections*/ true);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn Z(TheARM64Target, createAArch64MCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
- createAArch64MCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
- createAArch64MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
- createAArch64MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARM64Target,
- createAArch64MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
- createAArch64MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARM64Target,
- createAArch64MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
- createAArch64MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
- createAArch64MCSubtargetInfo);
+ for (Target *T :
+ {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAArch64MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createAArch64MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createAArch64MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
+
+ // Register the obj streamers.
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createAArch64ObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T,
+ createAArch64AsmTargetStreamer);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createAArch64MCInstPrinter);
+ }
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
- createAArch64leAsmBackend);
+ for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+ TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
createAArch64beAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheARM64Target,
- createAArch64leAsmBackend);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
- createAArch64MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
- createAArch64MCCodeEmitter);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheAArch64leTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheAArch64beTarget,
- createAArch64MCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARM64Target,
- createAArch64MCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
- createAArch64MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
- createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 1553115..7ce303b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -28,8 +28,10 @@ class MCRegisterInfo;
class MCObjectWriter;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheAArch64leTarget;
@@ -37,9 +39,8 @@ extern Target TheAArch64beTarget;
extern Target TheARM64Target;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
@@ -53,11 +54,14 @@ MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
uint32_t CPUSubtype);
-MCStreamer *
-createAArch64MCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
+MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+
+MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
+
} // End llvm namespace
// Defines symbolic names for AArch64 registers. This defines a mapping from
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index bc6c7a9..160c1c5 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -19,10 +19,10 @@
using namespace llvm;
StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Value == Value) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Value == Value) {
Valid = true;
- return Pairs[i].Name;
+ return Mappings[i].Name;
}
}
@@ -32,10 +32,10 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
std::string LowerCaseName = Name.lower();
- for (unsigned i = 0; i < NumPairs; ++i) {
- if (Pairs[i].Name == LowerCaseName) {
+ for (unsigned i = 0; i < NumMappings; ++i) {
+ if (Mappings[i].Name == LowerCaseName) {
Valid = true;
- return Pairs[i].Value;
+ return Mappings[i].Value;
}
}
@@ -47,7 +47,7 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
return Value < TooBigImm;
}
-const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
{"s1e1r", S1E1R},
{"s1e2r", S1E2R},
{"s1e3r", S1E3R},
@@ -63,9 +63,9 @@ const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
};
AArch64AT::ATMapper::ATMapper()
- : AArch64NamedImmMapper(ATPairs, 0) {}
+ : AArch64NamedImmMapper(ATMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
{"oshld", OSHLD},
{"oshst", OSHST},
{"osh", OSH},
@@ -81,9 +81,9 @@ const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[]
};
AArch64DB::DBarrierMapper::DBarrierMapper()
- : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
+ : AArch64NamedImmMapper(DBarrierMappings, 16u) {}
-const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
{"zva", ZVA},
{"ivac", IVAC},
{"isw", ISW},
@@ -95,25 +95,25 @@ const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
};
AArch64DC::DCMapper::DCMapper()
- : AArch64NamedImmMapper(DCPairs, 0) {}
+ : AArch64NamedImmMapper(DCMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
{"ialluis", IALLUIS},
{"iallu", IALLU},
{"ivau", IVAU}
};
AArch64IC::ICMapper::ICMapper()
- : AArch64NamedImmMapper(ICPairs, 0) {}
+ : AArch64NamedImmMapper(ICMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
{"sy", SY},
};
AArch64ISB::ISBMapper::ISBMapper()
- : AArch64NamedImmMapper(ISBPairs, 16) {}
+ : AArch64NamedImmMapper(ISBMappings, 16) {}
-const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
{"pldl1keep", PLDL1KEEP},
{"pldl1strm", PLDL1STRM},
{"pldl2keep", PLDL2KEEP},
@@ -135,18 +135,18 @@ const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
};
AArch64PRFM::PRFMMapper::PRFMMapper()
- : AArch64NamedImmMapper(PRFMPairs, 32) {}
+ : AArch64NamedImmMapper(PRFMMappings, 32) {}
-const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
{"spsel", SPSel},
{"daifset", DAIFSet},
{"daifclr", DAIFClr}
};
AArch64PState::PStateMapper::PStateMapper()
- : AArch64NamedImmMapper(PStatePairs, 0) {}
+ : AArch64NamedImmMapper(PStateMappings, 0) {}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"mdccsr_el0", MDCCSR_EL0},
{"dbgdtrrx_el0", DBGDTRRX_EL0},
{"mdrar_el1", MDRAR_EL1},
@@ -247,11 +247,11 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MRSPairs[0];
- NumInstPairs = llvm::array_lengthof(MRSPairs);
+ InstMappings = &MRSMappings[0];
+ NumInstMappings = llvm::array_lengthof(MRSMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
{"dbgdtrtx_el0", DBGDTRTX_EL0},
{"oslar_el1", OSLAR_EL1},
{"pmswinc_el0", PMSWINC_EL0},
@@ -271,12 +271,12 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
: SysRegMapper(FeatureBits) {
- InstPairs = &MSRPairs[0];
- NumInstPairs = llvm::array_lengthof(MSRPairs);
+ InstMappings = &MSRMappings[0];
+ NumInstMappings = llvm::array_lengthof(MSRMappings);
}
-const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
{"osdtrrx_el1", OSDTRRX_EL1},
{"osdtrtx_el1", OSDTRTX_EL1},
{"teecr32_el1", TEECR32_EL1},
@@ -756,7 +756,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[]
};
const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
+AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
{"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
};
@@ -765,29 +765,29 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string NameLower = Name.lower();
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Name == NameLower) {
Valid = true;
- return SysRegPairs[i].Value;
+ return SysRegMappings[i].Value;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Name == NameLower) {
Valid = true;
- return CycloneSysRegPairs[i].Value;
+ return CycloneSysRegMappings[i].Value;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Name == NameLower) {
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Name == NameLower) {
Valid = true;
- return InstPairs[i].Value;
+ return InstMappings[i].Value;
}
}
@@ -816,26 +816,26 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string
AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
// First search the registers shared by all
- for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
- if (SysRegPairs[i].Value == Bits) {
- return SysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
+ if (SysRegMappings[i].Value == Bits) {
+ return SysRegMappings[i].Name;
}
}
// Next search for target specific registers
if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
- if (CycloneSysRegPairs[i].Value == Bits) {
- return CycloneSysRegPairs[i].Name;
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
+ if (CycloneSysRegMappings[i].Value == Bits) {
+ return CycloneSysRegMappings[i].Name;
}
}
}
// Now try the instruction-specific registers (either read-only or
// write-only).
- for (unsigned i = 0; i < NumInstPairs; ++i) {
- if (InstPairs[i].Value == Bits) {
- return InstPairs[i].Name;
+ for (unsigned i = 0; i < NumInstMappings; ++i) {
+ if (InstMappings[i].Value == Bits) {
+ return InstMappings[i].Name;
}
}
@@ -850,7 +850,7 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
-const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
{"ipas2e1is", IPAS2E1IS},
{"ipas2le1is", IPAS2LE1IS},
{"vmalle1is", VMALLE1IS},
@@ -886,4 +886,4 @@ const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
};
AArch64TLBI::TLBIMapper::TLBIMapper()
- : AArch64NamedImmMapper(TLBIPairs, 0) {}
+ : AArch64NamedImmMapper(TLBIMappings, 0) {}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index c60b09a..2ae6f52 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -283,8 +283,8 @@ struct AArch64NamedImmMapper {
};
template<int N>
- AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
- : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
+ AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
+ : Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
StringRef toString(uint32_t Value, bool &Valid) const;
uint32_t fromString(StringRef Name, bool &Valid) const;
@@ -294,8 +294,8 @@ struct AArch64NamedImmMapper {
/// N being 0 indicates no immediate syntax-form is allowed.
bool validImm(uint32_t Value) const;
protected:
- const Mapping *Pairs;
- size_t NumPairs;
+ const Mapping *Mappings;
+ size_t NumMappings;
uint32_t TooBigImm;
};
@@ -317,7 +317,7 @@ namespace AArch64AT {
};
struct ATMapper : AArch64NamedImmMapper {
- const static Mapping ATPairs[];
+ const static Mapping ATMappings[];
ATMapper();
};
@@ -341,7 +341,7 @@ namespace AArch64DB {
};
struct DBarrierMapper : AArch64NamedImmMapper {
- const static Mapping DBarrierPairs[];
+ const static Mapping DBarrierMappings[];
DBarrierMapper();
};
@@ -361,7 +361,7 @@ namespace AArch64DC {
};
struct DCMapper : AArch64NamedImmMapper {
- const static Mapping DCPairs[];
+ const static Mapping DCMappings[];
DCMapper();
};
@@ -378,7 +378,7 @@ namespace AArch64IC {
struct ICMapper : AArch64NamedImmMapper {
- const static Mapping ICPairs[];
+ const static Mapping ICMappings[];
ICMapper();
};
@@ -394,7 +394,7 @@ namespace AArch64ISB {
SY = 0xf
};
struct ISBMapper : AArch64NamedImmMapper {
- const static Mapping ISBPairs[];
+ const static Mapping ISBMappings[];
ISBMapper();
};
@@ -424,7 +424,7 @@ namespace AArch64PRFM {
};
struct PRFMMapper : AArch64NamedImmMapper {
- const static Mapping PRFMPairs[];
+ const static Mapping PRFMMappings[];
PRFMMapper();
};
@@ -439,7 +439,7 @@ namespace AArch64PState {
};
struct PStateMapper : AArch64NamedImmMapper {
- const static Mapping PStatePairs[];
+ const static Mapping PStateMappings[];
PStateMapper();
};
@@ -1134,11 +1134,11 @@ namespace AArch64SysReg {
// burdening the common AArch64NamedImmMapper with abstractions only needed in
// this one case.
struct SysRegMapper {
- static const AArch64NamedImmMapper::Mapping SysRegPairs[];
- static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping SysRegMappings[];
+ static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
- const AArch64NamedImmMapper::Mapping *InstPairs;
- size_t NumInstPairs;
+ const AArch64NamedImmMapper::Mapping *InstMappings;
+ size_t NumInstMappings;
uint64_t FeatureBits;
SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
@@ -1147,12 +1147,12 @@ namespace AArch64SysReg {
};
struct MSRMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MSRPairs[];
+ static const AArch64NamedImmMapper::Mapping MSRMappings[];
MSRMapper(uint64_t FeatureBits);
};
struct MRSMapper : SysRegMapper {
- static const AArch64NamedImmMapper::Mapping MRSPairs[];
+ static const AArch64NamedImmMapper::Mapping MRSMappings[];
MRSMapper(uint64_t FeatureBits);
};
@@ -1197,7 +1197,7 @@ namespace AArch64TLBI {
};
struct TLBIMapper : AArch64NamedImmMapper {
- const static Mapping TLBIPairs[];
+ const static Mapping TLBIMappings[];
TLBIMapper();
};
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 387f1f6..7a1865c 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -27,12 +27,15 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <map>
@@ -678,8 +681,13 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
}
bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo());
- TRI = Fn.getSubtarget().getRegisterInfo();
+ const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
+ // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
+ // enabled when NEON is available.
+ if (!(STI.isCortexA15() && STI.hasNEON()))
+ return false;
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
MRI = &Fn.getRegInfo();
bool Modified = false;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index f080c60..ce0aed9 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -167,9 +167,12 @@ def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true",
def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true",
"Support ARM v6M instructions",
[HasV6Ops]>;
+def HasV6KOps : SubtargetFeature<"v6k", "HasV6KOps", "true",
+ "Support ARM v6k instructions",
+ [HasV6Ops]>;
def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
"Support ARM v6t2 instructions",
- [HasV6MOps, FeatureThumb2]>;
+ [HasV6MOps, HasV6KOps, FeatureThumb2]>;
def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
"Support ARM v7 instructions",
[HasV6T2Ops, FeaturePerfMon]>;
@@ -177,6 +180,9 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
[HasV7Ops, FeatureVirtualization,
FeatureMP]>;
+def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+ "Support ARM v8.1a instructions",
+ [HasV8Ops, FeatureAClass, FeatureCRC]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -320,12 +326,6 @@ def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
// V6M Processors.
def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
@@ -337,6 +337,14 @@ def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
FeatureDB, FeatureMClass]>;
+// V6K Processors.
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
+def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
// V6T2 Processors.
def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
FeatureDSPThumb2]>;
@@ -449,6 +457,14 @@ def : ProcessorModel<"cyclone", SwiftModel,
FeatureDB,FeatureDSPThumb2,
FeatureHasRAS, FeatureZCZeroing]>;
+// V8.1 Processors
+def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
+ FeatureDB, FeatureFPARMv8,
+ FeatureNEON, FeatureDSPThumb2,
+ FeatureHWDiv, FeatureHWDivARM,
+ FeatureTrustZone, FeatureT2XtPk,
+ FeatureCrypto]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 2544a01..102def1 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -120,9 +120,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer.EndCOFFSymbolDef();
}
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
// Emit the rest of the function body.
EmitFunctionBody();
@@ -438,65 +435,6 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
Triple TT(TM.getTargetTriple());
- if (TT.isOSBinFormatMachO()) {
- Reloc::Model RelocM = TM.getRelocationModel();
- if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
- // Declare all the text sections up front (before the DWARF sections
- // emitted by AsmPrinter::doInitialization) so the assembler will keep
- // them together at the beginning of the object file. This helps
- // avoid out-of-range branches that are due a fundamental limitation of
- // the way symbol offsets are encoded with the current Darwin ARM
- // relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(
- getObjFileLowering());
-
- // Collect the set of sections our functions will go into.
- SetVector<const MCSection *, SmallVector<const MCSection *, 8>,
- SmallPtrSet<const MCSection *, 8> > TextSections;
- // Default text section comes first.
- TextSections.insert(TLOFMacho.getTextSection());
- // Now any user defined text sections from function attributes.
- for (Module::iterator F = M.begin(), e = M.end(); F != e; ++F)
- if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
- TextSections.insert(TLOFMacho.SectionForGlobal(F, *Mang, TM));
- // Now the coalescable sections.
- TextSections.insert(TLOFMacho.getTextCoalSection());
- TextSections.insert(TLOFMacho.getConstTextCoalSection());
-
- // Emit the sections in the .s file header to fix the order.
- for (unsigned i = 0, e = TextSections.size(); i != e; ++i)
- OutStreamer.SwitchSection(TextSections[i]);
-
- if (RelocM == Reloc::DynamicNoPIC) {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__symbol_stub4",
- MachO::S_SYMBOL_STUBS,
- 12, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- } else {
- const MCSection *sect =
- OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
- MachO::S_SYMBOL_STUBS,
- 16, SectionKind::getText());
- OutStreamer.SwitchSection(sect);
- }
- const MCSection *StaticInitSect =
- OutContext.getMachOSection("__TEXT", "__StaticInit",
- MachO::S_REGULAR |
- MachO::S_ATTR_PURE_INSTRUCTIONS,
- SectionKind::getText());
- OutStreamer.SwitchSection(StaticInitSect);
- }
-
- // Compiling with debug info should not affect the code
- // generation. Ensure the cstring section comes before the
- // optional __DWARF secion. Otherwise, PC-relative loads would
- // have to use different instruction sequences at "-g" in order to
- // reach global data in the same object file.
- OutStreamer.SwitchSection(getObjFileLowering().getCStringSection());
- }
-
// Use unified assembler syntax.
OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
@@ -669,7 +607,7 @@ void ARMAsmPrinter::emitAttributes() {
std::string CPUString = STI.getCPUString();
- if (CPUString != "generic") {
+ if (CPUString.find("generic") != 0) { //CPUString doesn't start with "generic"
// FIXME: remove krait check when GNU tools support krait cpu
if (STI.isKrait()) {
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a9");
@@ -723,7 +661,8 @@ void ARMAsmPrinter::emitAttributes() {
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8);
+ STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+ ARMBuildAttrs::AllowNeonARMv8);
} else {
if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
@@ -960,10 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbol *MCSym;
if (ACPV->isLSDA()) {
- SmallString<128> Str;
- raw_svector_ostream OS(Str);
- OS << DL->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
- MCSym = OutContext.GetOrCreateSymbol(OS.str());
+ MCSym = getCurExceptionSym();
} else if (ACPV->isBlockAddress()) {
const BlockAddress *BA =
cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress();
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 50cb954..e475ae4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -103,13 +103,16 @@ private:
const MachineInstr *MI);
public:
- unsigned getISAEncoding(const Function *F) override {
+ unsigned getISAEncoding() override {
// ARM/Darwin adds ISA to the DWARF info for each function.
Triple TT(TM.getTargetTriple());
if (!TT.isOSBinFormatMachO())
return 0;
- const ARMSubtarget &STI = TM.getSubtarget<ARMSubtarget>(*F);
- return STI.isThumb() ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
+ bool isThumb = TT.getArch() == Triple::thumb ||
+ TT.getArch() == Triple::thumbeb ||
+ TT.getSubArch() == Triple::ARMSubArch_v7m ||
+ TT.getSubArch() == Triple::ARMSubArch_v6m;
+ return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
}
private:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 29ee22e..7ee3cb0 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -4115,19 +4116,21 @@ enum ARMExeDomain {
//
std::pair<uint16_t, uint16_t>
ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
- // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
- // if they are not predicated.
- if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
- // CortexA9 is particularly picky about mixing the two and wants these
- // converted.
- if (Subtarget.isCortexA9() && !isPredicated(MI) &&
- (MI->getOpcode() == ARM::VMOVRS ||
- MI->getOpcode() == ARM::VMOVSR ||
- MI->getOpcode() == ARM::VMOVS))
- return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
-
+ // If we don't have access to NEON instructions then we won't be able
+ // to swizzle anything to the NEON domain. Check to make sure.
+ if (Subtarget.hasNEON()) {
+ // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+ // if they are not predicated.
+ if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+
+ // CortexA9 is particularly picky about mixing the two and wants these
+ // converted.
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+ (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR ||
+ MI->getOpcode() == ARM::VMOVS))
+ return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));
+ }
// No other instructions can be swizzled, so just determine their domain.
unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
@@ -4220,6 +4223,9 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// Zap the predicate operands.
assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+ // Make sure we've got NEON instructions.
+ assert(Subtarget.hasNEON() && "VORRd requires NEON");
+
// Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
DstReg = MI->getOperand(0).getReg();
SrcReg = MI->getOperand(1).getReg();
@@ -4507,7 +4513,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
}
bool ARMBaseInstrInfo::hasNOP() const {
- return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+ return (Subtarget.getFeatureBits() & ARM::HasV6KOps) != 0;
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7574727..a8c7657 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,26 +45,27 @@
using namespace llvm;
-ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ARMBaseRegisterInfo::ARMBaseRegisterInfo()
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+
+static unsigned getFramePointerReg(const ARMSubtarget &STI) {
if (STI.isTargetMachO()) {
if (STI.isTargetDarwin() || STI.isThumb1Only())
- FramePtr = ARM::R7;
+ return ARM::R7;
else
- FramePtr = ARM::R11;
+ return ARM::R11;
} else if (STI.isTargetWindows())
- FramePtr = ARM::R11;
+ return ARM::R11;
else // ARM EABI
- FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
+ return STI.isThumb() ? ARM::R7 : ARM::R11;
}
const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
const MCPhysReg *RegList =
STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
- if (!MF) return RegList;
-
const Function *F = MF->getFunction();
if (F->getCallingConv() == CallingConv::GHC) {
// GHC set of callee saved regs is empty as all those regs are
@@ -89,8 +90,10 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return RegList;
}
-const uint32_t*
-ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
@@ -102,8 +105,10 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
-const uint32_t*
-ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
// This should return a register mask that is the same as that returned by
// getCallPreservedMask but that additionally preserves the register used for
// the first i32 argument (which must also be the register used to return a
@@ -121,7 +126,8 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
@@ -130,7 +136,7 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARM::FPSCR);
Reserved.set(ARM::APSR_NZCV);
if (TFI->hasFP(MF))
- Reserved.set(FramePtr);
+ Reserved.set(getFramePointerReg(STI));
if (hasBasePointer(MF))
Reserved.set(BasePtr);
// Some targets reserve R9.
@@ -150,9 +156,9 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass*
-ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &) const {
const TargetRegisterClass *Super = RC;
TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
do {
@@ -187,7 +193,8 @@ ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
switch (RC->getID()) {
default:
@@ -283,29 +290,6 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-bool
-ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
- // CortexA9 has a Write-after-write hazard for NEON registers.
- if (!STI.isLikeA9())
- return false;
-
- switch (RC->getID()) {
- case ARM::DPRRegClassID:
- case ARM::DPR_8RegClassID:
- case ARM::DPR_VFP2RegClassID:
- case ARM::QPRRegClassID:
- case ARM::QPR_8RegClassID:
- case ARM::QPR_VFP2RegClassID:
- case ARM::SPRRegClassID:
- case ARM::SPR_8RegClassID:
- // Avoid reusing S, D, and Q registers.
- // Don't increase register pressure for QQ and QQQQ.
- return true;
- default:
- return false;
- }
-}
-
bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -350,7 +334,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return false;
// Stack realignment requires a frame pointer. If we already started
// register allocation with frame pointer elimination, it is too late now.
- if (!MRI->canReserveReg(FramePtr))
+ if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
return false;
// We may also need a base pointer if there are dynamic allocas or stack
// pointer adjustments around calls.
@@ -384,10 +368,11 @@ cannotEliminateFrame(const MachineFunction &MF) const {
unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ const TargetFrameLowering *TFI = STI.getFrameLowering();
if (TFI->hasFP(MF))
- return FramePtr;
+ return getFramePointerReg(STI);
return ARM::SP;
}
@@ -539,7 +524,6 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The incoming offset is relating to the SP at the start of the function,
// but when we access the local it'll be relative to the SP after local
// allocation, so adjust our SP-relative offset by that allocation size.
- Offset = -Offset;
Offset += MFI->getLocalFrameSize();
// Assume that we'll have at least some spill slots allocated.
// FIXME: This is a total SWAG number. We should run some statistics
@@ -552,9 +536,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
if (TFI->hasFP(MF) &&
- (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
- if (isFrameOffsetLegal(MI, FPOffset))
+ if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
return false;
}
// If we can reference via the stack pointer, try that.
@@ -562,7 +545,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// to only disallow SP relative references in the live range of
// the VLA(s). In practice, it's unclear how much difference that
// would make, but it may be worth doing.
- if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
return false;
// The offset likely isn't legal, we want to allocate a virtual base register.
@@ -625,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
(void)Done;
}
-bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const {
const MCInstrDesc &Desc = MI->getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
@@ -669,7 +652,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT1_s:
- NumBits = 8;
+ NumBits = (BaseReg == ARM::SP ? 8 : 5);
Scale = 4;
isSigned = false;
break;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 17027c2..fdc1ef9 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -21,10 +21,6 @@
#include "ARMGenRegisterInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseInstrInfo;
- class Type;
-
/// Register allocation hints.
namespace ARMRI {
enum {
@@ -82,27 +78,22 @@ static inline bool isCalleeSavedRegister(unsigned Reg,
class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
- const ARMSubtarget &STI;
-
- /// FramePtr - ARM physical register used as frame ptr.
- unsigned FramePtr;
-
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
unsigned BasePtr;
// Can be only subclassed.
- explicit ARMBaseRegisterInfo(const ARMSubtarget &STI);
+ explicit ARMBaseRegisterInfo();
// Return the opcode that implements 'Op', or 0 if no opcode
unsigned getOpcode(int Op) const;
public:
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
@@ -113,7 +104,8 @@ public:
///
/// Should return NULL in the case that the calling convention does not have
/// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const;
BitVector getReservedRegs(const MachineFunction &MF) const override;
@@ -124,7 +116,8 @@ public:
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -138,8 +131,6 @@ public:
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
- bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const override;
-
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
@@ -152,7 +143,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
bool cannotEliminateFrame(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 375d394..9c8d228 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -2265,7 +2265,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
@@ -2416,7 +2416,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Finish off the call including any return values.
SmallVector<unsigned, 4> UsedRegs;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 5a5bd57..830953b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -293,7 +293,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -742,8 +742,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6ebf640..44cd1ef 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -257,7 +257,7 @@ private:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Form pairs of consecutive R, S, D, or Q registers.
@@ -3086,7 +3086,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
// Store exclusive double return a i32 value which is the return status
// of the issued store.
- EVT ResTys[] = { MVT::i32, MVT::Other };
+ const EVT ResTys[] = {MVT::i32, MVT::Other};
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
// Place arguments in the right order.
@@ -3472,9 +3472,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
bool ARMDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ assert(ConstraintID == InlineAsm::Constraint_m &&
+ "unexpected asm memory constraint");
// Require the address to be in a register. That is safe for all ARM
// variants and it is hard to do anything much smarter without knowing
// how the operand is used.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 56290aa..3b1b8dd 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,6 +23,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,6 +41,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -47,6 +49,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
@@ -568,14 +571,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::LOAD);
// It is legal to extload from v4i8 to v4i16 or v4i32.
- MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
- MVT::v4i16, MVT::v2i16,
- MVT::v2i32};
- for (unsigned i = 0; i < 6; ++i) {
+ for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
+ MVT::v2i32}) {
for (MVT VT : MVT::integer_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal);
- setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
}
@@ -614,6 +615,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, MVT::f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
}
@@ -869,14 +876,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Various VFP goodness
if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
- // int <-> fp are custom expanded into bit_convert + ARMISD ops.
- if (Subtarget->hasVFP2()) {
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- }
-
// FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
@@ -1033,11 +1032,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::RBIT: return "ARMISD::RBIT";
- case ARMISD::FTOSI: return "ARMISD::FTOSI";
- case ARMISD::FTOUI: return "ARMISD::FTOUI";
- case ARMISD::SITOF: return "ARMISD::SITOF";
- case ARMISD::UITOF: return "ARMISD::UITOF";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1164,6 +1158,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
+// source/dest is aligned and the copy size is large enough. We therefore want
+// to align such objects passed to memory intrinsics.
+bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const {
+ if (!isa<MemIntrinsic>(CI))
+ return false;
+ MinSize = 8;
+ // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
+ // cycle faster than 4-byte aligned LDM.
+ PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
+ return true;
+}
+
// Create a fast isel object.
FastISel *
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
@@ -1815,16 +1823,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
if (isThisReturn) {
// For 'this' returns, use the R0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
+ Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
// Set isThisReturn to false if the calling convention is not one that
// allows 'returned' to be modeled in this way, so LowerCallResult does
// not try to pass 'this' straight through
isThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
}
} else
- Mask = ARI->getCallPreservedMask(CallConv);
+ Mask = ARI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1857,60 +1865,61 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// on the stack. Remember the next parameter register to allocate,
/// and then confiscate the rest of the parameter registers to insure
/// this.
-void
-ARMTargetLowering::HandleByVal(
- CCState *State, unsigned &size, unsigned Align) const {
- unsigned reg = State->AllocateReg(GPRArgRegs);
+void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
+ unsigned Align) const {
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
- if (Subtarget->isAAPCS_ABI() && Align > 4) {
- unsigned AlignInRegs = Align / 4;
- unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
- for (unsigned i = 0; i < Waste; ++i)
- reg = State->AllocateReg(GPRArgRegs);
- }
- if (reg != 0) {
- unsigned excess = 4 * (ARM::R4 - reg);
-
- // Special case when NSAA != SP and parameter size greater than size of
- // all remained GPR regs. In that case we can't split parameter, we must
- // send it to stack. We also must set NCRN to R4, so waste all
- // remained registers.
- const unsigned NSAAOffset = State->getNextStackOffset();
- if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
- while (State->AllocateReg(GPRArgRegs))
- ;
- return;
- }
+ // Byval (as with any stack) slots are always at least 4 byte aligned.
+ Align = std::max(Align, 4U);
- // First register for byval parameter is the first register that wasn't
- // allocated before this method call, so it would be "reg".
- // If parameter is small enough to be saved in range [reg, r4), then
- // the end (first after last) register would be reg + param-size-in-regs,
- // else parameter would be splitted between registers and stack,
- // end register would be r4 in this case.
- unsigned ByValRegBegin = reg;
- unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
- State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
- // Note, first register is allocated in the beginning of function already,
- // allocate remained amount of registers we need.
- for (unsigned i = reg+1; i != ByValRegEnd; ++i)
- State->AllocateReg(GPRArgRegs);
- // A byval parameter that is split between registers and memory needs its
- // size truncated here.
- // In the case where the entire structure fits in registers, we set the
- // size in memory to zero.
- if (size < excess)
- size = 0;
- else
- size -= excess;
- }
+ unsigned Reg = State->AllocateReg(GPRArgRegs);
+ if (!Reg)
+ return;
+
+ unsigned AlignInRegs = Align / 4;
+ unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
+ for (unsigned i = 0; i < Waste; ++i)
+ Reg = State->AllocateReg(GPRArgRegs);
+
+ if (!Reg)
+ return;
+
+ unsigned Excess = 4 * (ARM::R4 - Reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ const unsigned NSAAOffset = State->getNextStackOffset();
+ if (NSAAOffset != 0 && Size > Excess) {
+ while (State->AllocateReg(GPRArgRegs))
+ ;
+ return;
}
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = Reg;
+ unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs);
+ // A byval parameter that is split between registers and memory needs its
+ // size truncated here.
+ // In the case where the entire structure fits in registers, we set the
+ // size in memory to zero.
+ Size = std::max<int>(Size - Excess, 0);
}
+
/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
@@ -1991,7 +2000,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
// emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
// the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
// support in the assembler and linker to be used. This would need to be
@@ -2819,50 +2828,6 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
-void
-ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize)
- const {
- unsigned NumGPRs;
- if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
- unsigned RBegin, REnd;
- CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- NumGPRs = REnd - RBegin;
- } else {
- unsigned int firstUnalloced;
- firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs);
- NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
- }
-
- unsigned Align = Subtarget->getFrameLowering()->getStackAlignment();
- ArgRegsSize = NumGPRs * 4;
-
- // If parameter is split between stack and GPRs...
- if (NumGPRs && Align > 4 &&
- (ArgRegsSize < ArgSize ||
- InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
- // Add padding for part of param recovered from GPRs. For example,
- // if Align == 8, its last byte must be at address K*8 - 1.
- // We need to do it, since remained (stack) part of parameter has
- // stack alignment, and we need to "attach" "GPRs head" without gaps
- // to it:
- // Stack:
- // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
- // [ [padding] [GPRs head] ] [ Tail passed via stack ....
- //
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned Padding =
- OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align);
- ArgRegsSaveSize = ArgRegsSize + Padding;
- } else
- // We don't need to extend regs save size for byval parameters if they
- // are passed via GPRs only.
- ArgRegsSaveSize = ArgRegsSize;
-}
-
// The remaining GPRs hold either the beginning of variable-argument
// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
@@ -2876,13 +2841,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const {
-
+ int ArgOffset,
+ unsigned ArgSize) const {
// Currently, two use-cases possible:
// Case #1. Non-var-args function, and we meet first byval parameter.
// Setup first unallocated register as first byval register;
@@ -2897,82 +2857,39 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex, lastRegToSaveIndex;
unsigned RBegin, REnd;
if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
- firstRegToSaveIndex = RBegin - ARM::R0;
- lastRegToSaveIndex = REnd - ARM::R0;
} else {
- firstRegToSaveIndex = CCInfo.getFirstUnallocated(GPRArgRegs);
- lastRegToSaveIndex = 4;
- }
-
- unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
- ArgRegsSize, ArgRegsSaveSize);
-
- // Store any by-val regs to their spots on the stack so that they may be
- // loaded by deferencing the result of formal parameter pointer or va_next.
- // Note: once stack area for byval/varargs registers
- // was initialized, it can't be initialized again.
- if (ArgRegsSaveSize) {
- unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
-
- if (Padding) {
- assert(AFI->getStoredByValParamsPadding() == 0 &&
- "The only parameter may be padded.");
- AFI->setStoredByValParamsPadding(Padding);
- }
-
- int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize,
- Padding +
- ByValStoreOffset -
- (int64_t)TotalArgRegsSaveSize,
- false);
- SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (Padding) {
- MFI->CreateFixedObject(Padding,
- ArgOffset + ByValStoreOffset -
- (int64_t)ArgRegsSaveSize,
- false);
- }
-
- SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
- ++firstRegToSaveIndex, ++i) {
- const TargetRegisterClass *RC;
- if (AFI->isThumb1OnlyFunction())
- RC = &ARM::tGPRRegClass;
- else
- RC = &ARM::GPRRegClass;
+ unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
+ REnd = ARM::R4;
+ }
- unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
- SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
- DAG.getConstant(4, getPointerTy()));
- }
+ if (REnd != RBegin)
+ ArgOffset = -4 * (ARM::R4 - RBegin);
- AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+ int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- return FrameIndex;
- } else {
- if (ArgSize == 0) {
- // We cannot allocate a zero-byte object for the first variadic argument,
- // so just make up a size.
- ArgSize = 4;
- }
- // This will point to the next argument passed via stack.
- return MFI->CreateFixedObject(
- ArgSize, ArgOffset, !ForceMutable);
+ SmallVector<SDValue, 4> MemOps;
+ const TargetRegisterClass *RC =
+ AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+
+ for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
+ unsigned VReg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+ DAG.getConstant(4, getPointerTy()));
}
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ return FrameIndex;
}
// Setup stack frame, the va_list pointer will start from.
@@ -2990,11 +2907,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
- int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
- CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
- 0, TotalArgRegsSaveSize);
-
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(),
+ CCInfo.getNextStackOffset(), 4);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3020,7 +2935,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
isVarArg));
SmallVector<SDValue, 16> ArgValues;
- int lastInsIndex = -1;
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
@@ -3030,50 +2944,40 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// We also increase this value in case of varargs function.
AFI->setArgRegsSaveSize(0);
- unsigned ByValStoreOffset = 0;
- unsigned TotalArgRegsSaveSize = 0;
- unsigned ArgRegsSaveSizeMaxAlign = 4;
-
// Calculate the amount of stack space that we need to allocate to store
// byval and variadic arguments that are passed in registers.
// We need to know this before we allocate the first byval or variadic
// argument, as they will be allocated a stack slot below the CFA (Canonical
// Frame Address, the stack pointer at entry to the function).
+ unsigned ArgRegBegin = ARM::R4;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
+ break;
+
CCValAssign &VA = ArgLocs[i];
- if (VA.isMemLoc()) {
- int index = VA.getValNo();
- if (index != lastInsIndex) {
- ISD::ArgFlagsTy Flags = Ins[index].Flags;
- if (Flags.isByVal()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProcessed(),
- Flags.getByValSize(),
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
-
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
- if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign)
- ArgRegsSaveSizeMaxAlign = Flags.getByValAlign();
- CCInfo.nextInRegsParam();
- }
- lastInsIndex = index;
- }
- }
+ unsigned Index = VA.getValNo();
+ ISD::ArgFlagsTy Flags = Ins[Index].Flags;
+ if (!Flags.isByVal())
+ continue;
+
+ assert(VA.isMemLoc() && "unexpected byval pointer in reg");
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
+ ArgRegBegin = std::min(ArgRegBegin, RBegin);
+
+ CCInfo.nextInRegsParam();
}
CCInfo.rewindByValRegsInfo();
- lastInsIndex = -1;
+
+ int lastInsIndex = -1;
if (isVarArg && MFI->hasVAStart()) {
- unsigned ExtraArgRegsSize;
- unsigned ExtraArgRegsSaveSize;
- computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0,
- ExtraArgRegsSize, ExtraArgRegsSaveSize);
- TotalArgRegsSaveSize += ExtraArgRegsSaveSize;
+ unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
+ if (RegIdx != array_lengthof(GPRArgRegs))
+ ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
}
- // If the arg regs save area contains N-byte aligned values, the
- // bottom of it must be at least N-byte aligned.
- TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign);
- TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U);
+
+ unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
+ AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3178,18 +3082,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
- int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- Flags.getByValSize(),
- true /*force mutable frames*/,
- ByValStoreOffset,
- TotalArgRegsSaveSize);
- ByValStoreOffset += Flags.getByValSize();
- ByValStoreOffset = std::min(ByValStoreOffset, 16U);
+ int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex, VA.getLocMemOffset(),
+ Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
CCInfo.nextInRegsParam();
} else {
@@ -3894,7 +3789,6 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorFP_TO_INT(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
@@ -3907,20 +3801,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::FP_TO_SINT:
- Opc = ARMISD::FTOSI;
- break;
- case ISD::FP_TO_UINT:
- Opc = ARMISD::FTOUI;
- break;
- }
- Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+ return Op;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3960,7 +3841,6 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isVector())
return LowerVectorINT_TO_FP(Op, DAG);
-
if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
@@ -3973,21 +3853,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
/*isSigned*/ false, SDLoc(Op)).first;
}
- SDLoc dl(Op);
- unsigned Opc;
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Invalid opcode!");
- case ISD::SINT_TO_FP:
- Opc = ARMISD::SITOF;
- break;
- case ISD::UINT_TO_FP:
- Opc = ARMISD::UITOF;
- break;
- }
-
- Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
- return DAG.getNode(Opc, dl, VT, Op);
+ return Op;
}
SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
@@ -7239,16 +7105,20 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Load an immediate to varEnd.
unsigned varEnd = MRI.createVirtualRegister(TRC);
- if (IsThumb2) {
+ if (Subtarget->useMovt(*MF)) {
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp)
- .addImm(LoopSize & 0xFFFF));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+ Vtmp).addImm(LoopSize & 0xFFFF));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
- .addReg(Vtmp).addImm(LoopSize >> 16));
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+ varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -10076,6 +9946,28 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
+ EVT VT = ExtVal.getValueType();
+
+ if (!isTypeLegal(VT))
+ return false;
+
+ // Don't create a loadext if we can fold the extension into a wide/long
+ // instruction.
+ // If there's more than one user instruction, the loadext is desirable no
+ // matter what. There can be two uses by the same instruction.
+ if (ExtVal->use_empty() ||
+ !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ return true;
+
+ SDNode *U = *ExtVal->use_begin();
+ if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ return false;
+
+ return true;
+}
+
bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
@@ -10289,9 +10181,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
// Thumb2 and ARM modes can use cmn for negative immediates.
if (!Subtarget->isThumb())
- return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getSOImmVal(std::abs(Imm)) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+ return ARM_AM::getT2SOImmVal(std::abs(Imm)) != -1;
// Thumb1 doesn't have cmn, and only 8-bit immediates.
return Imm >= 0 && Imm <= 255;
}
@@ -10302,7 +10194,7 @@ bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
/// immediate into a register.
bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
// Same encoding for add/sub, just flip the sign.
- int64_t AbsImm = llvm::abs64(Imm);
+ int64_t AbsImm = std::abs(Imm);
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(AbsImm) != -1;
if (Subtarget->isThumb2())
@@ -11198,9 +11090,12 @@ bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= (Subtarget->isMClass() ? 32U : 64U);
+ return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+ ? AtomicRMWExpansionKind::LLSC
+ : AtomicRMWExpansionKind::None;
}
// This has so far only been implemented for MachO.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index ec1407d..dd4c954 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -65,11 +65,6 @@ namespace llvm {
RBIT, // ARM bitreverse instruction
- FTOSI, // FP to sint within a FP register.
- FTOUI, // FP to uint within a FP register.
- SITOF, // sint to FP within a FP register.
- UITOF, // uint to FP within a FP register.
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -283,6 +278,8 @@ namespace llvm {
using TargetLowering::isZExtFree;
bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
@@ -346,6 +343,12 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const ARMSubtarget* getSubtarget() const {
return Subtarget;
}
@@ -360,6 +363,9 @@ namespace llvm {
return true;
}
+ bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
+ unsigned &PrefAlign) const override;
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -404,7 +410,8 @@ namespace llvm {
bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool useLoadStackGuardNode() const override;
@@ -525,12 +532,8 @@ namespace llvm {
SDLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned InRegsParamRecordIdx,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- unsigned ArgSize,
- bool ForceMutable,
- unsigned ByValStoreOffset,
- unsigned TotalArgRegsSaveSize) const;
+ int ArgOffset,
+ unsigned ArgSize) const;
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
SDLoc dl, SDValue &Chain,
@@ -538,12 +541,6 @@ namespace llvm {
unsigned TotalArgRegsSaveSize,
bool ForceMutable = false) const;
- void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned InRegsParamRecordIdx,
- unsigned ArgSize,
- unsigned &ArgRegsSize,
- unsigned &ArgRegsSaveSize) const;
-
SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7d27cf3..e79608d 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -983,7 +983,12 @@ class ARMV5MOPat<dag pattern, dag result> : Pat<pattern, result> {
class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsARM, HasV6];
}
-
+class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2];
+}
+class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
+ list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
+}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index bc617f0..7c004c9 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -30,8 +30,7 @@
using namespace llvm;
ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
@@ -146,6 +145,10 @@ namespace {
return false;
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ // Don't do this for Thumb1.
+ if (STI.isThumb1Only())
+ return false;
+
const TargetMachine &TM = MF.getTarget();
if (TM.getRelocationModel() != Reloc::PIC_)
return false;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 126c552..c3984ca 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -199,6 +199,9 @@ def HasV6M : Predicate<"Subtarget->hasV6MOps()">,
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">,
AssemblerPredicate<"HasV6T2Ops", "armv6t2">;
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV6K : Predicate<"Subtarget->hasV6KOps()">,
+ AssemblerPredicate<"HasV6KOps", "armv6k">;
+def NoV6K : Predicate<"!Subtarget->hasV6KOps()">;
def HasV7 : Predicate<"Subtarget->hasV7Ops()">,
AssemblerPredicate<"HasV7Ops", "armv7">;
def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
@@ -223,6 +226,8 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
+ AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
@@ -1835,11 +1840,11 @@ def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
let Inst{7-0} = imm;
}
-def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
-def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
+def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6K]>;
+def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6K]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 2a7b4b5..a6a07a8 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -2790,7 +2790,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
imm:$lane)))))))]>;
class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode ShOp>
+ ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -2826,7 +2826,7 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
- SDNode MulOp, SDNode ShOp>
+ SDPatternOperator MulOp, SDPatternOperator ShOp>
: N3VLane16<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$Vd),
(ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
@@ -3674,7 +3674,7 @@ multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
multiclass N3VMulOpSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
- string OpcodeStr, string Dt, SDNode ShOp> {
+ string OpcodeStr, string Dt, SDPatternOperator ShOp> {
def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
@@ -3711,27 +3711,38 @@ multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
// Neon 3-argument intrinsics,
-// element sizes of 8, 16 and 32 bits:
-multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itinD, InstrItinClass itinQ,
+// element sizes of 16 and 32 bits:
+multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt, SDPatternOperator IntOp> {
// 64-bit vector types.
- def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
- OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
- def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+ def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
- def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+ def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
- def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
- OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
- def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+ def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
- def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+ def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt, SDPatternOperator IntOp>
+ :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
+ itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
+ // 64-bit vector types.
+ def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ // 128-bit vector types.
+ def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+}
// Neon Long Multiply-Op vector operations,
// element sizes of 8, 16 and 32 bits:
@@ -4305,6 +4316,147 @@ defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+let Predicates = [HasNEON, HasV8_1a] in {
+ // v8.1a Neon Rounding Double Multiply-Op vector operations,
+ // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
+ // (Q += D * D)
+ defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqadds
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
+ imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqadds
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqadds
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqadds
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+
+ // VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
+ // (Q -= D * D)
+ defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
+ (v4i16 DPR:$Vm))))),
+ (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
+ (v2i32 DPR:$Vm))))),
+ (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
+ (v8i16 QPR:$Vm))))),
+ (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
+ (v4i32 QPR:$Vm))))),
+ (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
+
+ defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
+ IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
+ null_frag>;
+ def : Pat<(v4i16 (int_arm_neon_vqsubs
+ (v4i16 DPR:$src1),
+ (v4i16 (int_arm_neon_vqrdmulh
+ (v4i16 DPR:$Vn),
+ (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm),
+ imm:$lane)))))),
+ (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
+ def : Pat<(v2i32 (int_arm_neon_vqsubs
+ (v2i32 DPR:$src1),
+ (v2i32 (int_arm_neon_vqrdmulh
+ (v2i32 DPR:$Vn),
+ (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm),
+ imm:$lane)))))),
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
+ imm:$lane))>;
+ def : Pat<(v8i16 (int_arm_neon_vqsubs
+ (v8i16 QPR:$src1),
+ (v8i16 (int_arm_neon_vqrdmulh
+ (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3),
+ imm:$lane)))))),
+ (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
+ (v8i16 QPR:$src2),
+ (v4i16 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane)))>;
+ def : Pat<(v4i32 (int_arm_neon_vqsubs
+ (v4i32 QPR:$src1),
+ (v4i32 (int_arm_neon_vqrdmulh
+ (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3),
+ imm:$lane)))))),
+ (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
+ (v4i32 QPR:$src2),
+ (v2i32 (EXTRACT_SUBREG
+ QPR:$src3,
+ (DSubReg_i32_reg imm:$lane))),
+ (SubReg_i32_lane imm:$lane)))>;
+}
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
"vqdmlal", "s", null_frag>;
@@ -6158,6 +6310,21 @@ class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
(v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+class NVCVTIFPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode GPR:$a)),
+ (f32 (EXTRACT_SUBREG
+ (v2f32 (Inst
+ (INSERT_SUBREG
+ (v2f32 (IMPLICIT_DEF)),
+ (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
+ ssub_0))>;
+class NVCVTFIPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(i32 (OpNode SPR:$a)),
+ (i32 (EXTRACT_SUBREG
+ (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, ssub_0))),
+ ssub_0))>;
+
def : N3VSPat<fadd, VADDfd>;
def : N3VSPat<fsub, VSUBfd>;
def : N3VSPat<fmul, VMULfd>;
@@ -6173,10 +6340,22 @@ def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
def : N3VSPat<NEONfmax, VMAXfd>;
def : N3VSPat<NEONfmin, VMINfd>;
-def : N2VSPat<arm_ftosi, VCVTf2sd>;
-def : N2VSPat<arm_ftoui, VCVTf2ud>;
-def : N2VSPat<arm_sitof, VCVTs2fd>;
-def : N2VSPat<arm_uitof, VCVTu2fd>;
+def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
+def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
+def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
+def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
+
+// NEON doesn't have any f64 conversions, so provide patterns to make
+// sure the VFP conversions match when extracting from a vector.
+def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
+ (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
+
// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
def : Pat<(f32 (bitconvert GPR:$a)),
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e0a9314..afff016 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -11,16 +11,10 @@
//
//===----------------------------------------------------------------------===//
-def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
-def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
-def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
-def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
-def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
@@ -633,7 +627,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -641,7 +635,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"),
- [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>,
+ []>,
Requires<[HasFPARMv8]> {
let Inst{17-16} = rm;
}
@@ -649,7 +643,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -664,7 +658,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0,
(outs SPR:$Sd), (ins DPR:$Dm),
NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"),
- [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>,
+ []>,
Requires<[HasFPARMv8, HasDPVFP]> {
bits<5> Dm;
@@ -676,6 +670,27 @@ multiclass vcvt_inst<string opc, bits<2> rm,
let Inst{8} = 1;
}
}
+
+ let Predicates = [HasFPARMv8] in {
+ def : Pat<(i32 (fp_to_sint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SS") SPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node SPR:$a))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"US") SPR:$a),
+ GPR)>;
+ }
+ let Predicates = [HasFPARMv8, HasDPVFP] in {
+ def : Pat<(i32 (fp_to_sint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"SD") DPR:$a),
+ GPR)>;
+ def : Pat<(i32 (fp_to_uint (node (f64 DPR:$a)))),
+ (COPY_TO_REGCLASS
+ (!cast<Instruction>(NAME#"UD") DPR:$a),
+ GPR)>;
+ }
}
defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
@@ -980,14 +995,22 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 1; // s32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (sint_to_fp GPR:$a)),
+ (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOD (VLDRS addrmode5:$a))>;
+}
+
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -995,17 +1018,31 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
+ (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+ (VSITOS (VLDRS addrmode5:$a))>;
+
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+ []> {
let Inst{7} = 0; // u32
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(f64 (uint_to_fp GPR:$a)),
+ (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+ def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOD (VLDRS addrmode5:$a))>;
+}
+
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+ []> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1013,6 +1050,12 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
+ (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+ (VUITOS (VLDRS addrmode5:$a))>;
+
// FP -> Int:
class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
@@ -1055,14 +1098,22 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1070,17 +1121,31 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+ []> {
let Inst{7} = 1; // Z bit
}
+let Predicates=[HasVFP2, HasDPVFP] in {
+ def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+
+ def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+}
+
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+ []> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1088,6 +1153,12 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
let D = VFPNeonA8Domain;
}
+def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+
+def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
// FIXME: Verify encoding after integrated assembler is working.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a8d0981..eca8e28 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -19,7 +19,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index ddfdb52..a68ab1b 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -149,11 +149,7 @@ public:
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
- unsigned getArgRegsSaveSize(unsigned Align = 0) const {
- if (!Align)
- return ArgRegsSaveSize;
- return (ArgRegsSaveSize + Align - 1) & ~(Align - 1);
- }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
unsigned getReturnRegsCount() const { return ReturnRegsCount; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 80b4b48..e6e8cdf 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -16,6 +16,4 @@ using namespace llvm;
void ARMRegisterInfo::anchor() { }
-ARMRegisterInfo::ARMRegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index b623173..e2e650e 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,7 +23,7 @@ class ARMSubtarget;
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
virtual void anchor();
public:
- ARMRegisterInfo(const ARMSubtarget &STI);
+ ARMRegisterInfo();
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 89624dd..fbec9e6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -129,6 +129,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV5TEOps = false;
HasV6Ops = false;
HasV6MOps = false;
+ HasV6KOps = false;
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
@@ -165,6 +166,7 @@ void ARMSubtarget::initializeEnvironment() {
HasTrustZone = false;
HasCrypto = false;
HasCRC = false;
+ HasV8_1a = false;
HasZeroCycleZeroing = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f4deddf..f36cd5c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -56,13 +56,14 @@ protected:
ARMProcClassEnum ARMProcClass;
/// HasV4TOps, HasV5TOps, HasV5TEOps,
- /// HasV6Ops, HasV6MOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
+ /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
bool HasV4TOps;
bool HasV5TOps;
bool HasV5TEOps;
bool HasV6Ops;
bool HasV6MOps;
+ bool HasV6KOps;
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
@@ -181,6 +182,9 @@ protected:
/// HasCRC - if true, processor supports CRC instructions
bool HasCRC;
+ /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
+ bool HasV8_1a;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
@@ -287,6 +291,7 @@ public:
bool hasV5TEOps() const { return HasV5TEOps; }
bool hasV6Ops() const { return HasV6Ops; }
bool hasV6MOps() const { return HasV6MOps; }
+ bool hasV6KOps() const { return HasV6KOps; }
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
@@ -311,6 +316,7 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool hasV8_1a() const { return HasV8_1a; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index a97a058..1bee1b0 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -37,6 +37,11 @@ EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
" to make use of cmpxchg flow-based information"),
cl::init(true));
+static cl::opt<bool>
+EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
+ cl::desc("Enable ARM load/store optimization pass"),
+ cl::init(true));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -105,9 +110,11 @@ computeTargetABI(const Triple &TT, StringRef CPU,
return TargetABI;
}
-static std::string computeDataLayout(const Triple &TT,
- ARMBaseTargetMachine::ARMABI ABI,
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ const TargetOptions &Options,
bool isLittle) {
+ const Triple Triple(TT);
+ auto ABI = computeTargetABI(Triple, CPU, Options);
std::string Ret = "";
if (isLittle)
@@ -117,7 +124,7 @@ static std::string computeDataLayout(const Triple &TT,
// Big endian.
Ret += "E";
- Ret += DataLayout::getManglingComponent(TT);
+ Ret += DataLayout::getManglingComponent(Triple);
// Pointers are 32 bits and aligned to 32 bits.
Ret += "-p:32:32";
@@ -147,7 +154,7 @@ static std::string computeDataLayout(const Triple &TT,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (Triple.isOSNaCl())
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -164,9 +171,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
TargetABI(computeTargetABI(Triple(TT), CPU, Options)),
- DL(computeDataLayout(Triple(TT), TargetABI, isLittle)),
TLOF(createTLOF(Triple(getTargetTriple()))),
Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) {
@@ -325,7 +332,7 @@ void ARMPassConfig::addIRPasses() {
}
bool ARMPassConfig::addPreISel() {
- if (TM->getOptLevel() != CodeGenOpt::None)
+ if (TM->getOptLevel() == CodeGenOpt::Aggressive)
// FIXME: This is using the thumb1 only constant value for
// maximal global offset for merging globals. We may want
// to look into using the old value for non-thumb1 code of
@@ -339,32 +346,30 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
- const ARMSubtarget *Subtarget = &getARMSubtarget();
- if (Subtarget->isTargetELF() && !Subtarget->isThumb1Only() &&
+ if (Triple(TM->getTargetTriple()).isOSBinFormatELF() &&
TM->Options.EnableFastISel)
addPass(createARMGlobalBaseRegPass());
return false;
}
void ARMPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createARMLoadStoreOptimizationPass(true));
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createMLxExpansionPass());
- // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be
- // enabled when NEON is available.
- if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() &&
- getARMSubtarget().hasNEON() && !DisableA15SDOptimization) {
- addPass(createA15SDOptimizerPass());
+
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true));
+
+ if (!DisableA15SDOptimization)
+ addPass(createA15SDOptimizerPass());
}
}
void ARMPassConfig::addPreSched2() {
if (getOptLevel() != CodeGenOpt::None) {
- addPass(createARMLoadStoreOptimizationPass());
+ if (EnableARMLoadStoreOpt)
+ addPass(createARMLoadStoreOptimizationPass());
- if (getARMSubtarget().hasNEON())
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
// Expand some pseudo instructions into multiple instructions to allow
@@ -372,26 +377,21 @@ void ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only()) {
- // in v8, IfConversion depends on Thumb instruction widths
- if (getARMSubtarget().restrictIT() &&
- !getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().restrictIT())
+ addPass(createThumb2SizeReductionPass());
+ if (!getARMSubtarget().isThumb1Only())
addPass(&IfConverterID);
- }
}
- if (getARMSubtarget().isThumb2())
- addPass(createThumb2ITBlockPass());
+ addPass(createThumb2ITBlockPass());
}
void ARMPassConfig::addPreEmitPass() {
- if (getARMSubtarget().isThumb2()) {
- if (!getARMSubtarget().prefers32BitThumb())
- addPass(createThumb2SizeReductionPass());
+ addPass(createThumb2SizeReductionPass());
- // Constant island pass work on unbundled instructions.
+ // Constant island pass work on unbundled instructions.
+ if (getARMSubtarget().isThumb2())
addPass(&UnpackMachineBundlesID);
- }
addPass(createARMOptimizeBarriersPass());
addPass(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7f6a1ee..20ca97b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -30,7 +30,6 @@ public:
} TargetABI;
protected:
- const DataLayout DL;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
ARMSubtarget Subtarget;
bool isLittle;
@@ -45,9 +44,8 @@ public:
bool isLittle);
~ARMBaseTargetMachine() override;
- const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
const ARMSubtarget *getSubtargetImpl(const Function &F) const override;
- const DataLayout *getDataLayout() const override { return &DL; }
bool isLittleEndian() const { return isLittle; }
/// \brief Get the TargetIRAnalysis for this target.
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 55a5775..6694b53 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -4,6 +4,7 @@ arm_codegen_TBLGEN_TABLES := \
ARMGenRegisterInfo.inc \
ARMGenInstrInfo.inc \
ARMGenCodeEmitter.inc \
+ ARMGenCodeEmitter.inc \
ARMGenMCCodeEmitter.inc \
ARMGenMCPseudoLowering.inc \
ARMGenAsmWriter.inc \
@@ -41,10 +42,9 @@ arm_codegen_SRC_FILES := \
MLxExpansionPass.cpp \
Thumb1FrameLowering.cpp \
Thumb1InstrInfo.cpp \
- Thumb1RegisterInfo.cpp \
+ ThumbRegisterInfo.cpp \
Thumb2ITBlockPass.cpp \
Thumb2InstrInfo.cpp \
- Thumb2RegisterInfo.cpp \
Thumb2SizeReduction.cpp
# For the host
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 59461e8..2215efb 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,6 +276,9 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasD16() const {
return STI.getFeatureBits() & ARM::FeatureD16;
}
+ bool hasV8_1a() const {
+ return STI.getFeatureBits() & ARM::FeatureV8_1a;
+ }
void SwitchMode() {
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
@@ -342,10 +345,10 @@ public:
};
- ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser,
+ ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) {
- MCAsmParserExtension::Initialize(_Parser);
+ : STI(STI), MII(MII), UC(Parser) {
+ MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
MRI = getContext().getRegisterInfo();
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 2530640..0b698197 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -40,10 +40,9 @@ add_llvm_target(ARMCodeGen
MLxExpansionPass.cpp
Thumb1FrameLowering.cpp
Thumb1InstrInfo.cpp
- Thumb1RegisterInfo.cpp
+ ThumbRegisterInfo.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
- Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
)
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 16eea33..e15323d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -637,12 +637,12 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
printRegName(O, MO1.getReg());
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
- unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
- << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+ << ARM_AM::getAddrOpcStr(Op)
<< ImmOffs * 4
<< markup(">");
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.def b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
index 9f007a0..96a0c1a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMArchName.def
+++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.def
@@ -30,6 +30,7 @@ ARM_ARCH_NAME("armv5t", ARMV5T, "5T", v5T)
ARM_ARCH_NAME("armv5te", ARMV5TE, "5TE", v5TE)
ARM_ARCH_NAME("armv6", ARMV6, "6", v6)
ARM_ARCH_NAME("armv6j", ARMV6J, "6J", v6)
+ARM_ARCH_NAME("armv6k", ARMV6K, "6K", v6K)
ARM_ARCH_NAME("armv6t2", ARMV6T2, "6T2", v6T2)
ARM_ARCH_NAME("armv6z", ARMV6Z, "6Z", v6KZ)
ARM_ARCH_NAME("armv6zk", ARMV6ZK, "6ZK", v6KZ)
@@ -43,6 +44,8 @@ ARM_ARCH_NAME("armv7-m", ARMV7M, "7-M", v7)
ARM_ARCH_ALIAS("armv7m", ARMV7M)
ARM_ARCH_NAME("armv8-a", ARMV8A, "8-A", v8)
ARM_ARCH_ALIAS("armv8a", ARMV8A)
+ARM_ARCH_NAME("armv8.1-a", ARMV8_1A, "8.1-A", v8)
+ARM_ARCH_ALIAS("armv8.1a", ARMV8_1A)
ARM_ARCH_NAME("iwmmxt", IWMMXT, "iwmmxt", v5TE)
ARM_ARCH_NAME("iwmmxt2", IWMMXT2, "iwmmxt2", v5TE)
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 2b65520..9648ffa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -783,6 +783,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
break;
+ case ARM::ARMV6K:
case ARM::ARMV6Z:
case ARM::ARMV6ZK:
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -816,6 +817,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::ARMV8A:
+ case ARM::ARMV8_1A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -913,9 +915,8 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
setAttributeItem(ARMBuildAttrs::FP_arch,
ARMBuildAttrs::AllowFPARMv8A,
/* OverwriteExisting= */ false);
- setAttributeItem(ARMBuildAttrs::Advanced_SIMD_arch,
- ARMBuildAttrs::AllowNeonARMv8,
- /* OverwriteExisting= */ false);
+ // 'Advanced_SIMD_arch' must be emitted not here, but within
+ // ARMAsmPrinter::emitAttributes(), depending on hasV8Ops() and hasV8_1a()
break;
case ARM::SOFTVFP:
@@ -1362,25 +1363,29 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
namespace llvm {
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
- return S;
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new ARMTargetAsmStreamer(S, OS, *InstPrint, isVerboseAsm);
}
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
return new ARMTargetStreamer(S);
}
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new ARMTargetELFStreamer(S);
+ return new ARMTargetStreamer(S);
+}
+
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
- new ARMTargetELFStreamer(*S);
// FIXME: This should eventually end up somewhere else where more
// intelligent flag decisions can be made. For now we are just maintaining
// the status quo for ARM and setting EF_ARM_EABI_VER5 as the default.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 66a1618..caa8736 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -59,6 +59,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
// Exceptions handling
switch (TheTriple.getOS()) {
+ case Triple::Bitrig:
case Triple::NetBSD:
ExceptionsType = ExceptionHandling::DwarfCFI;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index efbebd3..e48cabb 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -441,14 +441,12 @@ public:
MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, true);
}
MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new ARMMCCodeEmitter(MCII, Ctx, false);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 68d32b2..5b90de3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -10,6 +10,7 @@
#include "ARMMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
#define DEBUG_TYPE "armmcexpr"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 06bf6c9..2be98d2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -26,8 +26,8 @@ private:
const VariantKind Kind;
const MCExpr *Expr;
- explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit ARMMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 8c19785..7ff7f9a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -153,6 +153,17 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
// Use CPU to figure out the exact features
ARMArchFeature = "+v8";
break;
+ case Triple::ARMSubArch_v8_1a:
+ if (NoCPU)
+ // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
+ ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
+ else
+ // Use CPU to figure out the exact features
+ ARMArchFeature = "+v8.1a";
+ break;
case Triple::ARMSubArch_v7m:
isThumb = true;
if (NoCPU)
@@ -195,6 +206,9 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
case Triple::ARMSubArch_v6t2:
ARMArchFeature = "+v6t2";
break;
+ case Triple::ARMSubArch_v6k:
+ ARMArchFeature = "+v6k";
+ break;
case Triple::ARMSubArch_v6m:
isThumb = true;
if (NoCPU)
@@ -295,27 +309,18 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
+static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
+ T.getArch() == Triple::thumb);
+}
- switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
- case Triple::MachO: {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
- new ARMTargetStreamer(*S);
- return S;
- }
- case Triple::COFF:
- assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
- case Triple::ELF:
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
- TheTriple.getArch() == Triple::thumb);
- }
+static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool DWARFMustBeAtTheEnd) {
+ return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
@@ -379,61 +384,53 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheARMLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn Y(TheARMBETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn A(TheThumbLETarget, createARMMCAsmInfo);
- RegisterMCAsmInfoFn B(TheThumbBETarget, createARMMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget,
- createARMMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget,
- createARMMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheARMBETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbLETarget, createARMMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheThumbBETarget, createARMMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheARMLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheARMBETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbLETarget, createARMMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheThumbBETarget, createARMMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheARMLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheARMBETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbLETarget,
- ARM_MC::createARMMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheThumbBETarget,
- ARM_MC::createARMMCSubtargetInfo);
-
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheARMLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheARMBETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbLETarget,
- createARMMCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheThumbBETarget,
- createARMMCInstrAnalysis);
+ for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
+ &TheThumbBETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createARMMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createARMMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createARMMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ ARM_MC::createARMMCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+
+ TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
+ TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
+ TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
+
+ // Register the obj target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createARMObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createARMTargetAsmStreamer);
+
+ // Register the null TargetStreamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createARMNullTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createARMMCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
+ }
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheARMLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheARMBETarget,
- createARMBEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbLETarget,
- createARMLEMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheThumbBETarget,
- createARMBEMCCodeEmitter);
+ for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
+ for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
@@ -442,44 +439,4 @@ extern "C" void LLVMInitializeARMTargetMC() {
createThumbLEAsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
createThumbBEAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheARMLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheARMBETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbLETarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheThumbBETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheARMLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheARMBETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
-
- // Register the null TargetStreamer.
- TargetRegistry::RegisterNullTargetStreamer(TheARMLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheARMBETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbLETarget,
- createARMNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheThumbBETarget,
- createARMNullTargetStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbLETarget,
- createARMMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheThumbBETarget,
- createARMMCInstPrinter);
-
- // Register the MC relocation info.
- TargetRegistry::RegisterMCRelocationInfo(TheARMLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheARMBETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbLETarget,
- createARMMCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheThumbBETarget,
- createARMMCRelocationInfo);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c17e959..7e9ba66 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -32,6 +32,7 @@ class MCRelocationInfo;
class MCTargetStreamer;
class StringRef;
class Target;
+class Triple;
class raw_ostream;
extern Target TheARMLETarget, TheThumbLETarget;
@@ -47,21 +48,20 @@ namespace ARM_MC {
StringRef FS);
}
-MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst);
-
MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S);
+MCTargetStreamer *createARMTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm);
+MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
+ const MCSubtargetInfo &STI);
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -80,10 +80,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
-/// will generate a PE/COFF object file.
+// Construct a PE/COFF machine code streamer which will generate a PE/COFF
+// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS);
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll);
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 593fe34..173cc93 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -72,14 +72,10 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
// opcode when r4 is not in .save directive.
// Compute the consecutive registers from r4 to r11.
- uint32_t Range = 0;
- uint32_t Mask = (1u << 4);
- for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
- if ((RegSave & Bit) == 0u)
- break;
- ++Range;
- Mask |= Bit;
- }
+ uint32_t Mask = RegSave & 0xff0u;
+ uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+ // Mask off non-consecutive registers. Keep r4.
+ Mask &= ~(0xffffffe0u << Range);
// Emit this opcode when the mask covers every registers.
uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
@@ -105,50 +101,24 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
/// Emit unwind opcodes for .vsave directives
void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
- size_t i = 32;
-
- while (i > 16) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 16 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
+ // We only have 4 bits to save the offset in the opcode so look at the lower
+ // and upper 16 bits separately.
+ for (uint32_t Regs : {VFPRegSave & 0xffff0000u, VFPRegSave & 0x0000ffffu}) {
+ while (Regs) {
+ // Now look for a run of set bits. Remember the MSB and LSB of the run.
+ auto RangeMSB = 32 - countLeadingZeros(Regs);
+ auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+ auto RangeLSB = RangeMSB - RangeLen;
+
+ int Opcode = RangeLSB >= 16
+ ? ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16
+ : ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD;
+
+ EmitInt16(Opcode | ((RangeLSB % 16) << 4) | (RangeLen - 1));
+
+ // Zero out bits we're done with.
+ Regs &= ~(-1u << RangeLSB);
}
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 |
- ((i - 16) << 4) | Range);
- }
-
- while (i > 0) {
- uint32_t Bit = 1u << (i - 1);
- if ((VFPRegSave & Bit) == 0u) {
- --i;
- continue;
- }
-
- uint32_t Range = 0;
-
- --i;
- Bit >>= 1;
-
- while (i > 0 && (VFPRegSave & Bit)) {
- --i;
- ++Range;
- Bit >>= 1;
- }
-
- EmitInt16(ARM::EHABI::UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) |
- Range);
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b344ced..dc707dc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,10 +37,10 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-namespace llvm {
-MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &Emitter, raw_ostream &OS) {
- return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
-}
+MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
}
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 51e519d..ed2deea 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -382,6 +382,9 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
TRI = Fn.getSubtarget().getRegisterInfo();
MRI = &Fn.getRegInfo();
const ARMSubtarget *STI = &Fn.getSubtarget<ARMSubtarget>();
+ // Only run this for CortexA9.
+ if (!STI->isCortexA9())
+ return false;
isLikeA9 = STI->isLikeA9() || STI->isSwift();
isSwift = STI->isSwift();
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index f4d9be3..2d031d0 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -232,7 +232,7 @@ Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
//===---------------------------------------------------------------------===//
Thumb1 immediate field sometimes keep pre-scaled values. See
-Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+ThumbRegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
Thumb2.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 7dcc64e..c496cd7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -41,7 +41,7 @@ static void
emitSPUpdate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII, DebugLoc dl,
- const Thumb1RegisterInfo &MRI,
+ const ThumbRegisterInfo &MRI,
int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
MRI, MIFlags);
@@ -53,8 +53,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
if (!hasReservedCallFrame(MF)) {
// If we have alloca, convert as follows:
// ADJCALLSTACKDOWN -> sub, sp, sp, amount
@@ -89,13 +89,12 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
@@ -328,17 +327,16 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- const Thumb1RegisterInfo *RegInfo =
- static_cast<const Thumb1RegisterInfo *>(STI.getRegisterInfo());
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
- unsigned Align = STI.getFrameLowering()->getStackAlignment();
- unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index b785b28..cf93203 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -16,7 +16,7 @@
#include "ARMFrameLowering.h"
#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index c24f740..29aaa15 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -22,8 +22,7 @@
using namespace llvm;
Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 9fba760..f3f493d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -15,13 +15,13 @@
#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class Thumb1InstrInfo : public ARMBaseInstrInfo {
- Thumb1RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb1InstrInfo(const ARMSubtarget &STI);
@@ -36,7 +36,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb1RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b657f2d..7bb2265 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -255,6 +255,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const ARMSubtarget &STI =
static_cast<const ARMSubtarget &>(Fn.getSubtarget());
+ if (!STI.isThumb2())
+ return false;
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
TRI = STI.getRegisterInfo();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 62c3752..26ca7e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -30,8 +30,7 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
cl::init(false));
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
- : ARMBaseInstrInfo(STI), RI(STI) {
-}
+ : ARMBaseInstrInfo(STI), RI() {}
/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 46a1f6d..916ab06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -15,14 +15,14 @@
#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H
#include "ARMBaseInstrInfo.h"
-#include "Thumb2RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
namespace llvm {
class ARMSubtarget;
class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
- Thumb2RegisterInfo RI;
+ ThumbRegisterInfo RI;
public:
explicit Thumb2InstrInfo(const ARMSubtarget &STI);
@@ -60,7 +60,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; }
+ const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
private:
void expandLoadStackGuard(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
deleted file mode 100644
index 0d5d85a..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Thumb2RegisterInfo.h"
-#include "ARM.h"
-#include "ARMSubtarget.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
-
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineConstantPool *ConstantPool = MF.getConstantPool();
- const Constant *C = ConstantInt::get(
- Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
- unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
-}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
deleted file mode 100644
index 1dd94cc..0000000
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H
-
-#include "ARMBaseRegisterInfo.h"
-
-namespace llvm {
-
-class ARMSubtarget;
-
-struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
-public:
- Thumb2RegisterInfo(const ARMSubtarget &STI);
-
- /// emitLoadConstPool - Emits a load from constpool to materialize the
- /// specified immediate.
- void
- emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val,
- ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0,
- unsigned MIFlags = MachineInstr::NoFlags) const override;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 2ee908b..e967e53 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/Function.h" // To access Function attributes
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -1002,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
+ if (STI->isThumb1Only() || STI->prefers32BitThumb())
+ return false;
+
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
// Optimizing / minimizing size?
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 5e2cbdc..b5f9d7e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
+//===-- ThumbRegisterInfo.cpp - Thumb-1 Register Information -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb1RegisterInfo.h"
+#include "ThumbRegisterInfo.h"
#include "ARMBaseInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
@@ -38,39 +38,35 @@ extern cl::opt<bool> ReuseFrameIndexVals;
using namespace llvm;
-Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMSubtarget &sti)
- : ARMBaseRegisterInfo(sti) {
-}
+ThumbRegisterInfo::ThumbRegisterInfo() : ARMBaseRegisterInfo() {}
+
+const TargetRegisterClass *
+ThumbRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
-const TargetRegisterClass*
-Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)
- const {
if (ARM::tGPRRegClass.hasSubClassEq(RC))
return &ARM::tGPRRegClass;
- return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC);
+ return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
const TargetRegisterClass *
-Thumb1RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
+ThumbRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ if (!MF.getSubtarget<ARMSubtarget>().isThumb1Only())
+ return ARMBaseRegisterInfo::getPointerRegClass(MF, Kind);
return &ARM::tGPRRegClass;
}
-/// emitLoadConstPool - Emits a load from constpool to materialize the
-/// specified immediate.
-void
-Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- DebugLoc dl,
- unsigned DestReg, unsigned SubIdx,
- int Val,
- ARMCC::CondCodes Pred, unsigned PredReg,
- unsigned MIFlags) const {
- assert((isARMLowRegister(DestReg) ||
- isVirtualRegister(DestReg)) &&
- "Thumb1 does not have ldr to high register");
-
+static void emitThumb1LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
MachineConstantPool *ConstantPool = MF.getConstantPool();
const Constant *C = ConstantInt::get(
@@ -83,6 +79,42 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.setMIFlags(MIFlags);
}
+static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ DebugLoc dl, unsigned DestReg,
+ unsigned SubIdx, int Val,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned MIFlags) {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ MachineConstantPool *ConstantPool = MF.getConstantPool();
+ const Constant *C = ConstantInt::get(
+ Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+ unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+ .setMIFlags(MIFlags);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ThumbRegisterInfo::emitLoadConstPool(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+ unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred,
+ unsigned PredReg, unsigned MIFlags) const {
+ MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (STI.isThumb1Only()) {
+ assert((isARMLowRegister(DestReg) || isVirtualRegister(DestReg)) &&
+ "Thumb1 does not have ldr to high register");
+ return emitThumb1LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+ }
+ return emitThumb2LoadConstPool(MBB, MBBI, dl, DestReg, SubIdx, Val, Pred,
+ PredReg, MIFlags);
+}
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
@@ -317,12 +349,14 @@ static unsigned convertToNonSPOpcode(unsigned Opcode) {
return Opcode;
}
-bool Thumb1RegisterInfo::
-rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const ARMBaseInstrInfo &TII) const {
+bool ThumbRegisterInfo::rewriteFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
+ assert(MBB.getParent()->getSubtarget<ARMSubtarget>().isThumb1Only() &&
+ "This isn't needed for thumb2!");
DebugLoc dl = MI.getDebugLoc();
MachineInstrBuilder MIB(*MBB.getParent(), &MI);
unsigned Opcode = MI.getOpcode();
@@ -386,8 +420,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
return Offset == 0;
}
-void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+void ThumbRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::resolveFrameIndex(MI, BaseReg, Offset);
+
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
int Off = Offset; // ARM doesn't need the general 64-bit offsets
unsigned i = 0;
@@ -403,12 +442,15 @@ void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
/// saveScavengerRegister - Spill the register so it can be used by the
/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
+bool ThumbRegisterInfo::saveScavengerRegister(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC,
+ unsigned Reg) const {
+
+ const ARMSubtarget &STI = MBB.getParent()->getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::saveScavengerRegister(MBB, I, UseMI, RC, Reg);
+
// Thumb1 can't use the emergency spill slot on the stack because
// ldr/str immediate offsets must be positive, and if we're referencing
// off the frame pointer (if, for example, there are alloca() calls in
@@ -452,14 +494,18 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
return true;
}
-void
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- unsigned VReg = 0;
+void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+ if (!STI.isThumb1Only())
+ return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum,
+ RS);
+
+ unsigned VReg = 0;
const ARMBaseInstrInfo &TII = *STI.getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h
index 5feaf52..23aaff3 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/ThumbRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//===- ThumbRegisterInfo.h - Thumb Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo
-// class.
+// This file contains the Thumb implementation of the TargetRegisterInfo
+// class. With the exception of emitLoadConstPool Thumb2 tracks
+// ARMBaseRegisterInfo, Thumb1 overloads the functions below.
//
//===----------------------------------------------------------------------===//
@@ -22,12 +23,13 @@ namespace llvm {
class ARMSubtarget;
class ARMBaseInstrInfo;
-struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+struct ThumbRegisterInfo : public ARMBaseRegisterInfo {
public:
- Thumb1RegisterInfo(const ARMSubtarget &STI);
+ ThumbRegisterInfo();
const TargetRegisterClass *
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 07f62a9..b91b0e1 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -17,16 +17,16 @@
#include "BPFSubtarget.h"
#include "BPFTargetMachine.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "bpf-isel"
diff --git a/lib/Target/BPF/BPFISelLowering.h b/lib/Target/BPF/BPFISelLowering.h
index 04d7908..5a6f0f7 100644
--- a/lib/Target/BPF/BPFISelLowering.h
+++ b/lib/Target/BPF/BPFISelLowering.h
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetLowering.h"
namespace llvm {
+class BPFSubtarget;
namespace BPFISD {
enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
diff --git a/lib/Target/BPF/BPFRegisterInfo.h b/lib/Target/BPF/BPFRegisterInfo.h
index 364d6f6..7072dd0 100644
--- a/lib/Target/BPF/BPFRegisterInfo.h
+++ b/lib/Target/BPF/BPFRegisterInfo.h
@@ -25,8 +25,7 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
BPFRegisterInfo();
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/BPF/BPFTargetMachine.cpp b/lib/Target/BPF/BPFTargetMachine.cpp
index 5245395..9487427 100644
--- a/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/lib/Target/BPF/BPFTargetMachine.cpp
@@ -35,9 +35,9 @@ BPFTargetMachine::BPFTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:64:64-i64:64-n32:64-S128", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
- DL("e-m:e-p:64:64-i64:64-n32:64-S128"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/BPF/BPFTargetMachine.h b/lib/Target/BPF/BPFTargetMachine.h
index 821cffc..6aeafb9 100644
--- a/lib/Target/BPF/BPFTargetMachine.h
+++ b/lib/Target/BPF/BPFTargetMachine.h
@@ -20,7 +20,6 @@
namespace llvm {
class BPFTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
BPFSubtarget Subtarget;
public:
@@ -28,8 +27,10 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
- const DataLayout *getDataLayout() const override { return &DL; }
- const BPFSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const BPFSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const BPFSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index b94693a..9c51d66 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -60,7 +60,6 @@ public:
MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new BPFMCCodeEmitter(MRI);
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index f82f009..fd04001 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -61,13 +61,11 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createBPFMCStreamer(const Target &T, StringRef TT,
+static MCStreamer *createBPFMCStreamer(const Triple &T,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS,
- MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
static MCInstPrinter *
@@ -104,7 +102,7 @@ extern "C" void LLVMInitializeBPFTargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
// Register the object streamer
- TargetRegistry::RegisterMCObjectStreamer(TheBPFTarget, createBPFMCStreamer);
+ TargetRegistry::RegisterELFStreamer(TheBPFTarget, createBPFMCStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheBPFTarget, createBPFMCInstPrinter);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 55901cc..1fd2bec 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -33,7 +33,6 @@ extern Target TheBPFTarget;
MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 818a992..87716e6 100644
--- a/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -14,5 +14,5 @@ using namespace llvm;
Target llvm::TheBPFTarget;
extern "C" void LLVMInitializeBPFTargetInfo() {
- RegisterTarget<Triple::bpf> X(TheBPFTarget, "bpf", "BPF");
+ RegisterTarget<Triple::bpf, /*HasJIT=*/true> X(TheBPFTarget, "bpf", "BPF");
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index c7fec52..d0e2010 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1981,7 +1981,8 @@ void CppWriter::printModule(const std::string& fname,
printEscapedString(mName);
Out << "\", getGlobalContext());";
if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayoutStr()
+ << "\");";
}
if (!TheModule->getTargetTriple().empty()) {
nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 4bae7f8..678a932 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -22,20 +22,13 @@ namespace llvm {
class formatted_raw_ostream;
-class CPPSubtarget : public TargetSubtargetInfo {
-};
-
struct CPPTargetMachine : public TargetMachine {
- CPPTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : TargetMachine(T, TT, CPU, FS, Options), Subtarget() {}
-private:
- CPPSubtarget Subtarget;
+ CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL)
+ : TargetMachine(T, "", TT, CPU, FS, Options) {}
public:
- const CPPSubtarget *getSubtargetImpl() const override { return &Subtarget; }
bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
CodeGenFileType FileType, bool DisableVerify,
AnalysisID StartAfter,
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index eaa8bef..c6ffb96 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -31,7 +31,6 @@ add_llvm_target(HexagonCodeGen
HexagonRemoveSZExtArgs.cpp
HexagonSelectionDAGInfo.cpp
HexagonSplitConst32AndConst64.cpp
- HexagonSplitTFRCondSets.cpp
HexagonSubtarget.cpp
HexagonTargetMachine.cpp
HexagonTargetObjectFile.cpp
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index e0a3b2f..dfe79f9 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -36,7 +36,6 @@ namespace llvm {
FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
FunctionPass *createHexagonCFGOptimizer();
- FunctionPass *createHexagonSplitTFRCondSets();
FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonHardwareLoops();
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index f892c9f..53a687c 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -28,10 +28,10 @@ def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def NoV5T : Predicate<"!Subtarget->hasV5TOps()">;
-def UseMEMOP : Predicate<"Subtarget->useMemOps()">;
-def IEEERndNearV5T : Predicate<"Subtarget->modeIEEERndNear()">;
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -168,14 +168,6 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
-def getRegShlForm : InstrMapping {
- let FilterClass = "ImmRegShl";
- let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
- let ColFields = ["InputType"];
- let KeyCol = ["imm"];
- let ValueCols = [["reg"]];
-}
-
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index dd193f9..5a26045 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -127,12 +127,21 @@ static bool isCombinableInstType(MachineInstr *MI,
case Hexagon::A2_tfrsi: {
// A transfer-immediate can be combined if its argument is a signed 8bit
// value.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- unsigned DestReg = MI->getOperand(0).getReg();
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg());
+
+ unsigned DestReg = Op0.getReg();
+ // Ensure that TargetFlags are MO_NO_FLAG for a global. This is a
+ // workaround for an ABI bug that prevents GOT relocations on combine
+ // instructions
+ if (!Op1.isImm() && Op1.getTargetFlags() != HexagonII::MO_NO_FLAG)
+ return false;
- // Only combine constant extended TFRI if we are in aggressive mode.
+ // Only combine constant extended A2_tfrsi if we are in aggressive mode.
+ bool NotExt = Op1.isImm() && isInt<8>(Op1.getImm());
return Hexagon::IntRegsRegClass.contains(DestReg) &&
- (ShouldCombineAggressively || isInt<8>(MI->getOperand(1).getImm()));
+ (ShouldCombineAggressively || NotExt);
}
case Hexagon::TFRI_V4: {
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 8176598..40059fb 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -79,7 +79,166 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
++MII) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::STriw_pred) {
+ if (Opc == Hexagon::S2_storerb_pci_pseudo ||
+ Opc == Hexagon::S2_storerh_pci_pseudo ||
+ Opc == Hexagon::S2_storeri_pci_pseudo ||
+ Opc == Hexagon::S2_storerd_pci_pseudo ||
+ Opc == Hexagon::S2_storerf_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pci_pseudo)
+ Opcode = Hexagon::S2_storerd_pci;
+ else if (Opc == Hexagon::S2_storeri_pci_pseudo)
+ Opcode = Hexagon::S2_storeri_pci;
+ else if (Opc == Hexagon::S2_storerh_pci_pseudo)
+ Opcode = Hexagon::S2_storerh_pci;
+ else if (Opc == Hexagon::S2_storerf_pci_pseudo)
+ Opcode = Hexagon::S2_storerf_pci;
+ else if (Opc == Hexagon::S2_storerb_pci_pseudo)
+ Opcode = Hexagon::S2_storerb_pci;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ MachineOperand &Op4 = MI->getOperand(4);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op4);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pci_pseudo ||
+ Opc == Hexagon::L2_loadri_pci_pseudo ||
+ Opc == Hexagon::L2_loadrh_pci_pseudo ||
+ Opc == Hexagon::L2_loadruh_pci_pseudo||
+ Opc == Hexagon::L2_loadrb_pci_pseudo ||
+ Opc == Hexagon::L2_loadrub_pci_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pci_pseudo)
+ Opcode = Hexagon::L2_loadrd_pci;
+ else if (Opc == Hexagon::L2_loadri_pci_pseudo)
+ Opcode = Hexagon::L2_loadri_pci;
+ else if (Opc == Hexagon::L2_loadrh_pci_pseudo)
+ Opcode = Hexagon::L2_loadrh_pci;
+ else if (Opc == Hexagon::L2_loadruh_pci_pseudo)
+ Opcode = Hexagon::L2_loadruh_pci;
+ else if (Opc == Hexagon::L2_loadrb_pci_pseudo)
+ Opcode = Hexagon::L2_loadrb_pci;
+ else if (Opc == Hexagon::L2_loadrub_pci_pseudo)
+ Opcode = Hexagon::L2_loadrub_pci;
+ else
+ llvm_unreachable("wrong Opc");
+
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ MachineOperand &Op5 = MI->getOperand(5);
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseude circ_ldd by the real circ_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(Op5);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::L2_loadrd_pbr_pseudo ||
+ Opc == Hexagon::L2_loadri_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrh_pbr_pseudo ||
+ Opc == Hexagon::L2_loadruh_pbr_pseudo||
+ Opc == Hexagon::L2_loadrb_pbr_pseudo ||
+ Opc == Hexagon::L2_loadrub_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::L2_loadrd_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrd_pbr;
+ else if (Opc == Hexagon::L2_loadri_pbr_pseudo)
+ Opcode = Hexagon::L2_loadri_pbr;
+ else if (Opc == Hexagon::L2_loadrh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrh_pbr;
+ else if (Opc == Hexagon::L2_loadruh_pbr_pseudo)
+ Opcode = Hexagon::L2_loadruh_pbr;
+ else if (Opc == Hexagon::L2_loadrb_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrb_pbr;
+ else if (Opc == Hexagon::L2_loadrub_pbr_pseudo)
+ Opcode = Hexagon::L2_loadrub_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op4 = MI->getOperand(4); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op4);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op2);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::S2_storerd_pbr_pseudo ||
+ Opc == Hexagon::S2_storeri_pbr_pseudo ||
+ Opc == Hexagon::S2_storerh_pbr_pseudo ||
+ Opc == Hexagon::S2_storerb_pbr_pseudo ||
+ Opc == Hexagon::S2_storerf_pbr_pseudo) {
+ unsigned Opcode;
+ if (Opc == Hexagon::S2_storerd_pbr_pseudo)
+ Opcode = Hexagon::S2_storerd_pbr;
+ else if (Opc == Hexagon::S2_storeri_pbr_pseudo)
+ Opcode = Hexagon::S2_storeri_pbr;
+ else if (Opc == Hexagon::S2_storerh_pbr_pseudo)
+ Opcode = Hexagon::S2_storerh_pbr;
+ else if (Opc == Hexagon::S2_storerf_pbr_pseudo)
+ Opcode = Hexagon::S2_storerf_pbr;
+ else if (Opc == Hexagon::S2_storerb_pbr_pseudo)
+ Opcode = Hexagon::S2_storerb_pbr;
+ else
+ llvm_unreachable("wrong Opc");
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3); // Modifier value.
+ // Emit a "C6 = Rn, C6 is the control register for M0".
+ BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::A2_tfrrcr),
+ Hexagon::C6)->addOperand(Op3);
+ // Replace the pseudo brev_ldd by the real brev_ldd.
+ MachineInstr *NewMI = BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Opcode));
+ NewMI->addOperand(Op0);
+ NewMI->addOperand(Op1);
+ NewMI->addOperand(MachineOperand::CreateReg(Hexagon::M0,
+ false, /*isDef*/
+ false, /*isImpl*/
+ true /*isKill*/));
+ NewMI->addOperand(Op2);
+ MII = MBB->erase(MI);
+ --MII;
+ } else if (Opc == Hexagon::STriw_pred) {
// STriw_pred [R30], ofst, SrcReg;
unsigned FP = MI->getOperand(0).getReg();
assert(FP == QST.getRegisterInfo()->getFrameRegister() &&
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 2b1992f..65d689b 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -140,7 +140,7 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
unsigned RetOpcode = MBBI->getOpcode();
- return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+ return RetOpcode == Hexagon::TCRETURNi || RetOpcode == Hexagon::TCRETURNr;
}
void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1577c33..c47ee9c 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -690,7 +690,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If the induction variable bump is not a power of 2, quit.
// Othwerise we'd need a general integer division.
- if (!isPowerOf2_64(abs64(IVBump)))
+ if (!isPowerOf2_64(std::abs(IVBump)))
return nullptr;
MachineBasicBlock *PH = Loop->getLoopPreheader();
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index fb056b5..aaccac8 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -45,37 +45,25 @@ namespace llvm {
///
namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const HexagonSubtarget *Subtarget;
-
- // Keep a reference to HexagonTargetMachine.
- const HexagonTargetMachine& TM;
- DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
+ const HexagonTargetMachine& HTM;
+ const HexagonSubtarget *HST;
public:
- explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+ explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(targetmachine, OptLevel), TM(targetmachine) {
+ : SelectionDAGISel(tm, OptLevel), HTM(tm) {
initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
- bool hasNumUsesBelowThresGA(SDNode *N) const;
- SDNode *Select(SDNode *N) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
- // Complex Pattern Selectors.
- inline bool foldGlobalAddress(SDValue &N, SDValue &R);
- inline bool foldGlobalAddressGP(SDValue &N, SDValue &R);
- bool foldGlobalAddressImpl(SDValue &N, SDValue &R, bool ShouldLookForGP);
- bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
- bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
- bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+ virtual void PreprocessISelDAG() override;
+
+ SDNode *Select(SDNode *N) override;
// Complex Pattern Selectors.
inline bool SelectAddrGA(SDValue &N, SDValue &R);
@@ -87,18 +75,12 @@ public:
return "Hexagon DAG->DAG Pattern Instruction Selection";
}
- bool runOnMachineFunction(MachineFunction &MF) override {
- Subtarget = &MF.getSubtarget<HexagonSubtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
+ SDNode *SelectFrameIndex(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
-
SDNode *SelectLoad(SDNode *N);
SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
SDNode *SelectIndexedLoad(LoadSDNode *LD, SDLoc dl);
@@ -110,99 +92,98 @@ public:
SDNode *SelectIndexedStore(StoreSDNode *ST, SDLoc dl);
SDNode *SelectStore(SDNode *N);
SDNode *SelectSHL(SDNode *N);
- SDNode *SelectSelect(SDNode *N);
- SDNode *SelectTruncate(SDNode *N);
SDNode *SelectMul(SDNode *N);
SDNode *SelectZeroExtend(SDNode *N);
- SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectIntrinsicWChain(SDNode *N);
+ SDNode *SelectIntrinsicWOChain(SDNode *N);
SDNode *SelectConstant(SDNode *N);
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
- bool isConstExtProfitable(SDNode *N) const;
-
-// XformMskToBitPosU5Imm - Returns the bit position which
-// the single bit 32 bit mask represents.
-// Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
- int32_t bitPos;
- bitPos = Log2_32(Imm);
- assert(bitPos >= 0 && bitPos < 32 &&
- "Constant out of range for 32 BitPos Memops");
- return CurDAG->getTargetConstant(bitPos, MVT::i32);
-}
-
-// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ SDNode *SelectBitOp(SDNode *N);
+
+ // XformMskToBitPosU5Imm - Returns the bit position which
+ // the single bit 32 bit mask represents.
+ // Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU5Imm(uint32_t Imm) {
+ int32_t bitPos;
+ bitPos = Log2_32(Imm);
+ assert(bitPos >= 0 && bitPos < 32 &&
+ "Constant out of range for 32 BitPos Memops");
+ return CurDAG->getTargetConstant(bitPos, MVT::i32);
+ }
-// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit
-// mask represents. Used in Clr and Set bit immediate memops.
-SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
- return XformMskToBitPosU5Imm(Imm);
-}
+ // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
+ // 16 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU4Imm(uint16_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+ }
-// Return true if there is exactly one bit set in V, i.e., if V is one of the
-// following integers: 2^0, 2^1, ..., 2^31.
-bool ImmIsSingleBit(uint32_t v) const {
- return isPowerOf2_32(v);
-}
+ // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
+ // 8 bit mask represents. Used in Clr and Set bit immediate memops.
+ SDValue XformMskToBitPosU3Imm(uint8_t Imm) {
+ return XformMskToBitPosU5Imm(Imm);
+ }
-// XformM5ToU5Imm - Return a target constant with the specified value, of type
-// i32 where the negative literal is transformed into a positive literal for
-// use in -= memops.
-inline SDValue XformM5ToU5Imm(signed Imm) {
- assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
- return CurDAG->getTargetConstant( - Imm, MVT::i32);
-}
+ // Return true if there is exactly one bit set in V, i.e., if V is one of the
+ // following integers: 2^0, 2^1, ..., 2^31.
+ bool ImmIsSingleBit(uint32_t v) const {
+ return isPowerOf2_32(v);
+ }
+ // XformM5ToU5Imm - Return a target constant with the specified value, of
+ // type i32 where the negative literal is transformed into a positive literal
+ // for use in -= memops.
+ inline SDValue XformM5ToU5Imm(signed Imm) {
+ assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant( - Imm, MVT::i32);
+ }
-// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
-// [1..128], used in cmpb.gtu instructions.
-inline SDValue XformU7ToU7M1Imm(signed Imm) {
- assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
-}
+ // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
+ // [1..128], used in cmpb.gtu instructions.
+ inline SDValue XformU7ToU7M1Imm(signed Imm) {
+ assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
+ }
-// XformS8ToS8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformSToSM1Imm(signed Imm) {
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformS8ToS8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformSToSM1Imm(signed Imm) {
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+ }
-// XformU8ToU8M1Imm - Return a target constant decremented by 1.
-inline SDValue XformUToUM1Imm(unsigned Imm) {
- assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
- return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
-}
+ // XformU8ToU8M1Imm - Return a target constant decremented by 1.
+ inline SDValue XformUToUM1Imm(unsigned Imm) {
+ assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+ return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+ }
-// XformSToSM2Imm - Return a target constant decremented by 2.
-inline SDValue XformSToSM2Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm - 2, MVT::i32);
-}
+ // XformSToSM2Imm - Return a target constant decremented by 2.
+ inline SDValue XformSToSM2Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm - 2, MVT::i32);
+ }
-// XformSToSM3Imm - Return a target constant decremented by 3.
-inline SDValue XformSToSM3Imm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm - 3, MVT::i32);
-}
+ // XformSToSM3Imm - Return a target constant decremented by 3.
+ inline SDValue XformSToSM3Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm - 3, MVT::i32);
+ }
-// Include the pieces autogenerated from the target description.
-#include "HexagonGenDAGISel.inc"
+ // Include the pieces autogenerated from the target description.
+ #include "HexagonGenDAGISel.inc"
private:
- bool isValueExtension(SDValue const &Val, unsigned FromBits, SDValue &Src);
-};
+ bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
+}; // end HexagonDAGToDAGISel
} // end anonymous namespace
/// createHexagonISelDag - This pass converts a legalized DAG into a
/// Hexagon-specific DAG, ready for instruction scheduling.
///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+namespace llvm {
+FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
return new HexagonDAGToDAGISel(TM, OptLevel);
}
+}
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
@@ -216,76 +197,6 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
}
-static bool IsS11_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<11>(v);
-}
-
-
-static bool IsS11_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,1>(v);
-}
-
-
-static bool IsS11_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,2>(v);
-}
-
-
-static bool IsS11_3_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS16 predicate - True if the immediate fits in a 16-bit sign extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<11,3>(v);
-}
-
-
-static bool IsU6_0_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}
-
-
-static bool IsU6_1_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}
-
-
-static bool IsU6_2_Offset(SDNode * S) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}
-
-
// Intrinsics that return a a predicate.
static unsigned doesIntrinsicReturnPredicate(unsigned ID)
{
@@ -332,216 +243,119 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
}
}
-static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
- if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
- return true;
- }
- if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
- return true;
- }
- if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
- return true;
- }
- if (MemType == MVT::i8 && isInt<11>(Offset)) {
- return true;
- }
- return false;
-}
-
-
-//
-// Try to lower loads of GlobalAdresses into base+offset loads. Custom
-// lowering for GlobalAddress nodes has already turned it into a
-// CONST32.
-//
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl) {
- SDValue Chain = LD->getChain();
- SDNode* Const32 = LD->getBasePtr().getNode();
- unsigned Opcode = 0;
-
- if (Const32->getOpcode() == HexagonISD::CONST32 &&
- ISD::isNormalLoad(LD)) {
- SDValue Base = Const32->getOperand(0);
- EVT LoadedVT = LD->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
- // Figure out base + offset opcode
- if (LoadedVT == MVT::i64) Opcode = Hexagon::L2_loadrd_io;
- else if (LoadedVT == MVT::i32) Opcode = Hexagon::L2_loadri_io;
- else if (LoadedVT == MVT::i16) Opcode = Hexagon::L2_loadrh_io;
- else if (LoadedVT == MVT::i8) Opcode = Hexagon::L2_loadrb_io;
- else llvm_unreachable("unknown memory type");
-
- // Build indexed load.
- SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- LD->getValueType(0),
- MVT::Other,
- SDValue(NewBase,0),
- TargetConstOff,
- Chain);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(LD, Result);
- return Result;
- }
- }
-
- return SelectCode(LD);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
- MVT::Other, Base, TargetConst,
- Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
- SDValue(Result_1, 0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_2;
- }
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other, Base, TargetConst0,
+
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+ MVT::Other, Base, TargetConst,
Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl,
- MVT::i64, SDValue(Result_1, 0));
- SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl,
- MVT::i32, Base, TargetConstVal,
- SDValue(Result_1, 1));
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
+ SDValue(LD, 2) };
const SDValue Tos[] = { SDValue(Result_2, 0),
- SDValue(Result_3, 0),
- SDValue(Result_1, 1)
- };
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
return Result_2;
}
- return SelectCode(LD);
+
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
+ Base, TargetConst0, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
+ SDValue(Result_1, 0));
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_3, 0),
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
unsigned Opcode,
- SDLoc dl)
-{
+ SDLoc dl) {
SDValue Chain = LD->getChain();
EVT LoadedVT = LD->getMemoryVT();
SDValue Base = LD->getBasePtr();
SDValue Offset = LD->getOffset();
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- SDValue N1 = LD->getOperand(1);
- SDValue CPTmpN1_0;
- SDValue CPTmpN1_1;
-
- if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
- N1.getNode()->getValueType(0) == MVT::i32) {
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
- SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::i32, MVT::Other, Base,
- TargetConstVal, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
- MVT::i64, MVT::Other,
- SDValue(Result_2,0),
- SDValue(Result_1,0));
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = LD->getMemOperand();
- cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0),
- SDValue(Result_1, 1),
- SDValue(Result_1, 2)
- };
- ReplaceUses(Froms, Tos, 3);
- return Result_3;
- }
- // Generate an indirect load.
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
- MVT::Other,
- Base, TargetConst0, Chain);
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
+ MVT::i32, MVT::Other, Base,
+ TargetConstVal, Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
MVT::i64, MVT::Other,
- SDValue(Result_2,0),
+ TargetConst0,
SDValue(Result_1,0));
- // Add offset to base.
- SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
- Base, TargetConstVal,
- SDValue(Result_1, 1));
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
const SDValue Froms[] = { SDValue(LD, 0),
SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result_3, 0), // Load value.
- SDValue(Result_4, 0), // New address.
- SDValue(Result_1, 1)
- };
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0),
+ SDValue(Result_1, 1),
+ SDValue(Result_1, 2) };
ReplaceUses(Froms, Tos, 3);
- return Result_3;
+ return Result_2;
}
- return SelectCode(LD);
+ // Generate an indirect load.
+ SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+ SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+ SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+ MVT::Other, Base, TargetConst0,
+ Chain);
+ SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A4_combineir, dl,
+ MVT::i64, MVT::Other,
+ TargetConst0,
+ SDValue(Result_1,0));
+ // Add offset to base.
+ SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
+ Base, TargetConstVal,
+ SDValue(Result_1, 1));
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2) };
+ const SDValue Tos[] = { SDValue(Result_2, 0), // Load value.
+ SDValue(Result_3, 0), // New address.
+ SDValue(Result_1, 1) };
+ ReplaceUses(Froms, Tos, 3);
+ return Result_2;
}
@@ -555,45 +369,44 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
EVT LoadedVT = LD->getMemoryVT();
unsigned Opcode = 0;
- // Check for zero ext loads.
- bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+ // Check for zero extended loads. Treat any-extend loads as zero extended
+ // loads.
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
// Figure out the opcode.
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
if (LoadedVT == MVT::i64) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadrd_pi;
else
Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadri_pi;
else
Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
- Opcode = zextval ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
- if (TII->isValidAutoIncImm(LoadedVT, Val))
- Opcode = zextval ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
+ if (TII.isValidAutoIncImm(LoadedVT, Val))
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
- Opcode = zextval ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
} else
llvm_unreachable("unknown memory type");
- // For zero ext i64 loads, we need to add combine instructions.
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::ZEXTLOAD) {
+ // For zero extended i64 loads, we need to add combine instructions.
+ if (LD->getValueType(0) == MVT::i64 && IsZeroExt)
return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
- }
- if (LD->getValueType(0) == MVT::i64 &&
- LD->getExtensionType() == ISD::SEXTLOAD) {
- // Handle sign ext i64 loads.
+ // Handle sign extended i64 loads.
+ if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
- }
- if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+
+ if (TII.isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
@@ -649,7 +462,7 @@ SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
if (AM != ISD::UNINDEXED) {
result = SelectIndexedLoad(LD, dl);
} else {
- result = SelectBaseOffsetLoad(LD, dl);
+ result = SelectCode(LD);
}
return result;
@@ -665,13 +478,12 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Get the constant value.
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
EVT StoredVT = ST->getMemoryVT();
+ EVT ValueVT = Value.getValueType();
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo *TII = Subtarget->getInstrInfo();
- if (TII->isValidAutoIncImm(StoredVT, Val)) {
- SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
- Chain};
+ const HexagonInstrInfo &TII = *HST->getInstrInfo();
+ if (TII.isValidAutoIncImm(StoredVT, Val)) {
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -681,6 +493,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
else llvm_unreachable("unknown memory type");
+ if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
+ assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
+ Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+ dl, MVT::i32, Value);
+ }
+ SDValue Ops[] = {Base, CurDAG->getTargetConstant(Val, MVT::i32), Value,
+ Chain};
// Build post increment store.
SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
MVT::Other, Ops);
@@ -694,7 +513,8 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
}
// Note: Order of operands matches the def of instruction:
- // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+ // def S2_storerd_io
+ // : STInst<(outs), (ins IntRegs:$base, imm:$offset, DoubleRegs:$src1), ...
// and it differs for POST_ST* for instance.
SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
Chain};
@@ -724,61 +544,6 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
return Result_2;
}
-
-SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
- SDLoc dl) {
- SDValue Chain = ST->getChain();
- SDNode* Const32 = ST->getBasePtr().getNode();
- SDValue Value = ST->getValue();
- unsigned Opcode = 0;
-
- // Try to lower stores of GlobalAdresses into indexed stores. Custom
- // lowering for GlobalAddress nodes has already turned it into a
- // CONST32. Avoid truncating stores for the moment. Post-inc stores
- // do the same. Don't think there's a reason for it, so will file a
- // bug to fix.
- if ((Const32->getOpcode() == HexagonISD::CONST32) &&
- !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
- SDValue Base = Const32->getOperand(0);
- if (Base.getOpcode() == ISD::TargetGlobalAddress) {
- EVT StoredVT = ST->getMemoryVT();
- int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
- if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
- MVT PointerTy = getTargetLowering()->getPointerTy();
- const GlobalValue* GV =
- cast<GlobalAddressSDNode>(Base)->getGlobal();
- SDValue TargAddr =
- CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
- SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
- dl, PointerTy,
- TargAddr);
-
- // Figure out base + offset opcode
- if (StoredVT == MVT::i64) Opcode = Hexagon::S2_storerd_io;
- else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
- else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
- else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
- else llvm_unreachable("unknown memory type");
-
- SDValue Ops[] = {SDValue(NewBase,0),
- CurDAG->getTargetConstant(Offset,PointerTy),
- Value, Chain};
- // build indexed store
- SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
- MVT::Other, Ops);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = ST->getMemOperand();
- cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(ST, Result);
- return Result;
- }
- }
- }
-
- return SelectCode(ST);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
@@ -789,7 +554,7 @@ SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
return SelectIndexedStore(ST, dl);
}
- return SelectBaseOffsetStore(ST, dl);
+ return SelectCode(ST);
}
SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
@@ -875,187 +640,6 @@ SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
return SelectCode(N);
}
-
-SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
- SDLoc dl(N);
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::SETCC) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- SDValue N000 = N00.getOperand(0);
- SDValue N001 = N00.getOperand(1);
- if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
- SDValue N01 = N0.getOperand(1);
- SDValue N02 = N0.getOperand(2);
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_max, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
-
- // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
- // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
- // IntRegs:i32:$src2)
- // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
- // Pattern complexity = 9 cost = 1 size = 0.
- if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
- SDValue N1 = N->getOperand(1);
- if (N01 == N1) {
- SDValue N2 = N->getOperand(2);
- if (N000 == N2 &&
- N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
- N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
- SDNode *SextNode = CurDAG->getMachineNode(Hexagon::A2_sxth, dl,
- MVT::i32, N000);
- SDNode *Result = CurDAG->getMachineNode(Hexagon::A2_min, dl,
- MVT::i32,
- SDValue(SextNode, 0),
- N1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
- }
- }
- }
- }
-
- return SelectCode(N);
-}
-
-
-SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
- SDLoc dl(N);
- SDValue Shift = N->getOperand(0);
-
- //
- // %conv.i = sext i32 %tmp1 to i64
- // %conv2.i = sext i32 %add to i64
- // %mul.i = mul nsw i64 %conv2.i, %conv.i
- // %shr5.i = lshr i64 %mul.i, 32
- // %conv3.i = trunc i64 %shr5.i to i32
- //
- // --- match with the following ---
- //
- // %conv3.i = mpy (%tmp1, %add)
- //
- // Trunc to i32.
- if (N->getValueType(0) == MVT::i32) {
- // Trunc from i64.
- if (Shift.getNode()->getValueType(0) == MVT::i64) {
- // Trunc child is logical shift right.
- if (Shift.getOpcode() != ISD::SRL) {
- return SelectCode(N);
- }
-
- SDValue ShiftOp0 = Shift.getOperand(0);
- SDValue ShiftOp1 = Shift.getOperand(1);
-
- // Shift by const 32
- if (ShiftOp1.getOpcode() != ISD::Constant) {
- return SelectCode(N);
- }
-
- int32_t ShiftConst =
- cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
- if (ShiftConst != 32) {
- return SelectCode(N);
- }
-
- // Shifting a i64 signed multiply
- SDValue Mul = ShiftOp0;
- if (Mul.getOpcode() != ISD::MUL) {
- return SelectCode(N);
- }
-
- SDValue MulOp0 = Mul.getOperand(0);
- SDValue MulOp1 = Mul.getOperand(1);
-
- SDValue OP0;
- SDValue OP1;
-
- // Handle sign_extend and sextload
- if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext0 = MulOp0.getOperand(0);
- if (Sext0.getNode()->getValueType(0) != MVT::i32) {
- return SelectCode(N);
- }
-
- OP0 = Sext0;
- } else if (MulOp0.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Same goes for the second operand.
- if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
- SDValue Sext1 = MulOp1.getOperand(0);
- if (Sext1.getNode()->getValueType(0) != MVT::i32)
- return SelectCode(N);
-
- OP1 = Sext1;
- } else if (MulOp1.getOpcode() == ISD::LOAD) {
- LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
- if (LD->getMemoryVT() != MVT::i32 ||
- LD->getExtensionType() != ISD::SEXTLOAD ||
- LD->getAddressingMode() != ISD::UNINDEXED) {
- return SelectCode(N);
- }
-
- SDValue Chain = LD->getChain();
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
- OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
- MVT::Other,
- LD->getBasePtr(),
- TargetConst0, Chain), 0);
- } else {
- return SelectCode(N);
- }
-
- // Generate a mpy instruction.
- SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpy_up, dl, MVT::i32,
- OP0, OP1);
- ReplaceUses(N, Result);
- return Result;
- }
- }
-
- return SelectCode(N);
-}
-
-
SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i32) {
@@ -1134,6 +718,36 @@ SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
//
SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
SDLoc dl(N);
+
+ SDValue Op0 = N->getOperand(0);
+ EVT OpVT = Op0.getValueType();
+ unsigned OpBW = OpVT.getSizeInBits();
+
+ // Special handling for zero-extending a vector of booleans.
+ if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
+ SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
+ unsigned NE = OpVT.getVectorNumElements();
+ EVT ExVT = N->getValueType(0);
+ unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+ uint64_t MV = 0, Bit = 1;
+ for (unsigned i = 0; i < NE; ++i) {
+ MV |= Bit;
+ Bit <<= ES;
+ }
+ SDValue Ones = CurDAG->getTargetConstant(MV, MVT::i64);
+ SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+ MVT::i64, Ones);
+ if (ExVT.getSizeInBits() == 32) {
+ SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, MVT::i32);
+ return CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
+ SDValue(And,0), SubR);
+ }
+ return CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
+ SDValue(Mask,0), SDValue(OnesReg,0));
+ }
+
SDNode *IsIntrinsic = N->getOperand(0).getNode();
if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
unsigned ID =
@@ -1141,7 +755,7 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
if (doesIntrinsicReturnPredicate(ID)) {
// Now we need to differentiate target data types.
if (N->getValueType(0) == MVT::i64) {
- // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+ // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
MVT::i32,
@@ -1171,6 +785,203 @@ SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
}
//
+// Checking for intrinsics circular load/store, and bitreverse load/store
+// instrisics in order to select the correct lowered operation.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (IntNo == Intrinsic::hexagon_circ_ldd ||
+ IntNo == Intrinsic::hexagon_circ_ldw ||
+ IntNo == Intrinsic::hexagon_circ_lduh ||
+ IntNo == Intrinsic::hexagon_circ_ldh ||
+ IntNo == Intrinsic::hexagon_circ_ldub ||
+ IntNo == Intrinsic::hexagon_circ_ldb) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+ SDValue Offset = N->getOperand(5);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ // Only the *_ld instructions push the extra return type, and bump the
+ // result node operand number correspondingly.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_circ_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pci_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pci_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pci_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_circ_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pci_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ int32_t Val = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ Ops.push_back(CurDAG->getTargetConstant(Val, MVT::i32));
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd ||
+ IntNo == Intrinsic::hexagon_brev_ldw ||
+ IntNo == Intrinsic::hexagon_brev_ldh ||
+ IntNo == Intrinsic::hexagon_brev_lduh ||
+ IntNo == Intrinsic::hexagon_brev_ldb ||
+ IntNo == Intrinsic::hexagon_brev_ldub) {
+ SDLoc dl(N);
+ SDValue Chain = N->getOperand(0);
+ SDValue Base = N->getOperand(2);
+ SDValue Load = N->getOperand(3);
+ SDValue ModifierExpr = N->getOperand(4);
+
+ // We need to add the rerurn type for the load. This intrinsic has
+ // two return types, one for the load and one for the post-increment.
+ std::vector<EVT> ResTys;
+ unsigned opc;
+ unsigned memsize, align;
+ MVT MvtSize = MVT::i32;
+
+ if (IntNo == Intrinsic::hexagon_brev_ldd) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i64);
+ opc = Hexagon::L2_loadrd_pbr_pseudo;
+ memsize = 8;
+ align = 8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldw) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadri_pbr_pseudo;
+ memsize = 4;
+ align = 4;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_lduh) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadruh_pbr_pseudo;
+ memsize = 2;
+ align = 2;
+ MvtSize = MVT::i16;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldb) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrb_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else if (IntNo == Intrinsic::hexagon_brev_ldub) {
+ ResTys.push_back(MVT::i32);
+ ResTys.push_back(MVT::i32);
+ opc = Hexagon::L2_loadrub_pbr_pseudo;
+ memsize = 1;
+ align = 1;
+ MvtSize = MVT::i8;
+ } else
+ llvm_unreachable("no opc");
+
+ ResTys.push_back(MVT::Other);
+
+ // Copy over the arguments, which are the same mostly.
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(Base);
+ Ops.push_back(Load);
+ Ops.push_back(ModifierExpr);
+ Ops.push_back(Chain);
+ SDNode* Result = CurDAG->getMachineNode(opc, dl, ResTys, Ops);
+ SDValue ST;
+ MachineMemOperand *Mem =
+ MF->getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, memsize, align);
+ if (MvtSize != MVT::i32)
+ ST = CurDAG->getTruncStore(Chain, dl, SDValue(Result, 1), Load,
+ MvtSize, Mem);
+ else
+ ST = CurDAG->getStore(Chain, dl, SDValue(Result, 1), Load, Mem);
+
+ SDNode* Store = SelectStore(ST.getNode());
+
+ const SDValue Froms[] = { SDValue(N, 0),
+ SDValue(N, 1) };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Store, 0) };
+ ReplaceUses(Froms, Tos, 2);
+ return Result;
+ }
+
+ return SelectCode(N);
+}
+
+//
// Checking for intrinsics which have predicate registers as operand(s)
// and lowering to the actual intrinsic.
//
@@ -1217,37 +1028,20 @@ SDNode *HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
return SelectCode(N);
}
-
//
// Map predicate true (encoded as -1 in LLVM) to a XOR.
//
SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
SDLoc dl(N);
if (N->getValueType(0) == MVT::i1) {
- SDNode* Result;
+ SDNode* Result = 0;
int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (Val == -1) {
- // Create the IntReg = 1 node.
- SDNode* IntRegTFR =
- CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32,
- CurDAG->getTargetConstant(0, MVT::i32));
-
- // Pd = IntReg
- SDNode* Pd = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- SDValue(IntRegTFR, 0));
-
- // not(Pd)
- SDNode* NotPd = CurDAG->getMachineNode(Hexagon::C2_not, dl, MVT::i1,
- SDValue(Pd, 0));
-
- // xor(not(Pd))
- Result = CurDAG->getMachineNode(Hexagon::C2_xor, dl, MVT::i1,
- SDValue(Pd, 0), SDValue(NotPd, 0));
-
- // We have just built:
- // Rs = Pd
- // Pd = xor(not(Pd), Pd)
-
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
+ } else if (Val == 0) {
+ Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
+ }
+ if (Result) {
ReplaceUses(N, Result);
return Result;
}
@@ -1283,347 +1077,282 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
return Result;
}
-
-SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return nullptr; // Already selected.
- }
-
-
- switch (N->getOpcode()) {
- case ISD::Constant:
- return SelectConstant(N);
-
- case ISD::ConstantFP:
- return SelectConstantFP(N);
-
- case ISD::ADD:
- return SelectAdd(N);
-
- case ISD::SHL:
- return SelectSHL(N);
-
- case ISD::LOAD:
- return SelectLoad(N);
-
- case ISD::STORE:
- return SelectStore(N);
-
- case ISD::SELECT:
- return SelectSelect(N);
-
- case ISD::TRUNCATE:
- return SelectTruncate(N);
-
- case ISD::MUL:
- return SelectMul(N);
-
- case ISD::ZERO_EXTEND:
- return SelectZeroExtend(N);
-
- case ISD::INTRINSIC_WO_CHAIN:
- return SelectIntrinsicWOChain(N);
- }
-
- return SelectCode(N);
-}
-
-
//
-// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
-// to define these instructions.
+// Map the following, where possible.
+// AND/FABS -> clrbit
+// OR -> setbit
+// XOR/FNEG ->toggle_bit.
//
-bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
+SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
+ SDLoc dl(N);
+ EVT ValueVT = N->getValueType(0);
+ // We handle only 32 and 64-bit bit ops.
+ if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
+ ValueVT == MVT::f32 || ValueVT == MVT::f64))
+ return SelectCode(N);
-bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ // We handly only fabs and fneg for V5.
+ unsigned Opc = N->getOpcode();
+ if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps())
+ return SelectCode(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
+ int64_t Val = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (N->getOperand(1).getOpcode() == ISD::Constant)
+ Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
+ else
+ return SelectCode(N);
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_0_Offset(Offset.getNode()));
-}
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
+ if (Opc == ISD::AND) {
+ if (((ValueVT == MVT::i32) &&
+ (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
+ ((ValueVT == MVT::i64) &&
+ (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
+ // If it's simple AND, do the normal op.
+ return SelectCode(N);
+ else
+ Val = ~Val;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_1_Offset(Offset.getNode()));
-}
-
-
-bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
+ // If OR or AND is being fed by shl, srl and, sra don't do this change,
+ // because Hexagon provide |= &= on shl, srl, and sra.
+ // Traverse the DAG to see if there is shl, srl and sra.
+ if (Opc == ISD::OR || Opc == ISD::AND) {
+ switch (N->getOperand(0)->getOpcode()) {
+ default: break;
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return SelectCode(N);
+ }
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_2_Offset(Offset.getNode()));
-}
+ // Make sure it's power of 2.
+ unsigned bitpos = 0;
+ if (Opc != ISD::FABS && Opc != ISD::FNEG) {
+ if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
+ ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+ return SelectCode(N);
-bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
+ // Get the bit position.
+ bitpos = countTrailingZeros(uint64_t(Val));
+ } else {
+ // For fabs and fneg, it's always the 31st bit.
+ bitpos = 31;
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_0_Offset(Offset.getNode()));
-}
+ unsigned BitOpc = 0;
+ // Set the right opcode for bitwise operations.
+ switch(Opc) {
+ default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ case ISD::AND:
+ case ISD::FABS:
+ BitOpc = Hexagon::S2_clrbit_i;
+ break;
+ case ISD::OR:
+ BitOpc = Hexagon::S2_setbit_i;
+ break;
+ case ISD::XOR:
+ case ISD::FNEG:
+ BitOpc = Hexagon::S2_togglebit_i;
+ break;
+ }
-bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ SDNode *Result;
+ // Get the right SDVal for the opcode.
+ SDValue SDVal = CurDAG->getTargetConstant(bitpos, MVT::i32);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
+ if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
+ Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
+ N->getOperand(0), SDVal);
+ } else {
+ // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
+ EVT SubValueVT;
+ if (ValueVT == MVT::i64)
+ SubValueVT = MVT::i32;
+ else
+ SubValueVT = MVT::f32;
+
+ SDNode *Reg = N->getOperand(0).getNode();
+ SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
+ MVT::i64);
+
+ SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg,
+ MVT::i32);
+ SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg,
+ MVT::i32);
+
+ SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, SDValue(Reg, 0));
+
+ // Clear/set/toggle hi or lo registers depending on the bit position.
+ if (SubValueVT != MVT::f32 && bitpos < 32) {
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregLO, SDVal);
+ const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
+ SDValue(Result0, 0), SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ } else {
+ if (Opc != ISD::FABS && Opc != ISD::FNEG)
+ SDVal = CurDAG->getTargetConstant(bitpos-32, MVT::i32);
+ SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
+ SubregHI, SDVal);
+ const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
+ SubregLO, SubregLoIdx };
+ Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+ dl, ValueVT, Ops);
+ }
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_1_Offset(Offset.getNode()));
+
+ ReplaceUses(N, Result);
+ return Result;
}
-bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+SDNode *HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
+ int FX = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+ SDLoc DL(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsU6_2_Offset(Offset.getNode()));
-}
+ SDNode *R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+ if (N->getHasDebugValue())
+ CurDAG->TransferDbgValues(SDValue(N, 0), SDValue(R, 0));
+ return R;
+}
-bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
- return(SelectADDRriS11_2(Addr, Base, Offset));
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
}
- return SelectADDRriS11_2(Addr, Base, Offset);
-}
+ switch (N->getOpcode()) {
+ case ISD::Constant:
+ return SelectConstant(N);
+ case ISD::ConstantFP:
+ return SelectConstantFP(N);
-bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
- SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ case ISD::FrameIndex:
+ return SelectFrameIndex(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
- }
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return (IsS11_3_Offset(Offset.getNode()));
-}
+ case ISD::ADD:
+ return SelectAdd(N);
-bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
- SDValue &R2) {
- if (Addr.getOpcode() == ISD::FrameIndex) return false;
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
-
- if (Addr.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- if (isInt<13>(CN->getSExtValue()))
- return false; // Let the reg+imm pattern catch this!
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- return true;
- }
+ case ISD::SHL:
+ return SelectSHL(N);
- R1 = Addr;
+ case ISD::LOAD:
+ return SelectLoad(N);
- return true;
-}
+ case ISD::STORE:
+ return SelectStore(N);
+ case ISD::MUL:
+ return SelectMul(N);
-// Handle generic address case. It is accessed from inlined asm =m constraints,
-// which could have any kind of pointer.
-bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
- SDValue &Base, SDValue &Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress)
- return false; // Direct calls.
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::FABS:
+ case ISD::FNEG:
+ return SelectBitOp(N);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
- }
+ case ISD::ZERO_EXTEND:
+ return SelectZeroExtend(N);
- if (Addr.getOpcode() == ISD::ADD) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ case ISD::INTRINSIC_W_CHAIN:
+ return SelectIntrinsicWChain(N);
+
+ case ISD::INTRINSIC_WO_CHAIN:
+ return SelectIntrinsicWOChain(N);
}
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+ return SelectCode(N);
}
bool HexagonDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- SDValue Op0, Op1;
-
- switch (ConstraintCode) {
- case 'o': // Offsetable.
- case 'v': // Not offsetable.
- default: return true;
- case 'm': // Memory.
- if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
- return true;
+ SDValue Inp = Op, Res;
+
+ switch (ConstraintID) {
+ default:
+ return true;
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_o: // Offsetable.
+ case InlineAsm::Constraint_v: // Not offsetable.
+ case InlineAsm::Constraint_m: // Memory.
+ if (SelectAddrFI(Inp, Res))
+ OutOps.push_back(Res);
+ else
+ OutOps.push_back(Inp);
break;
}
- OutOps.push_back(Op0);
- OutOps.push_back(Op1);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
return false;
}
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
- unsigned UseCount = 0;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- UseCount++;
- }
-
- return (UseCount <= 1);
-
-}
-
-//===--------------------------------------------------------------------===//
-// Return 'true' if use count of the global address is below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::hasNumUsesBelowThresGA(SDNode *N) const {
- assert(N->getOpcode() == ISD::TargetGlobalAddress &&
- "Expecting a target global address");
-
- // Always try to fold the address.
- if (TM.getOptLevel() == CodeGenOpt::Aggressive)
- return true;
-
- GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
- DenseMap<const GlobalValue *, unsigned>::const_iterator GI =
- GlobalAddressUseCountMap.find(GA->getGlobal());
-
- if (GI == GlobalAddressUseCountMap.end())
- return false;
-
- return GI->second <= MaxNumOfUsesForConstExtenders;
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the non-GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddress(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, false);
-}
-
-//===--------------------------------------------------------------------===//
-// Return true if the GP-relative global address can be folded.
-//===--------------------------------------------------------------------===//
-inline bool HexagonDAGToDAGISel::foldGlobalAddressGP(SDValue &N, SDValue &R) {
- return foldGlobalAddressImpl(N, R, true);
-}
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ SelectionDAG &DAG = *CurDAG;
+ std::vector<SDNode*> Nodes;
+ for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
+ Nodes.push_back(I);
+
+ // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ // This may not be the right thing for all targets, so do it here.
+ for (auto I: Nodes) {
+ if (I->getOpcode() != ISD::OR)
+ continue;
+
+ auto IsZero = [] (const SDValue &V) -> bool {
+ if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
+ return SC->isNullValue();
+ return false;
+ };
+ auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
+ if (Op.getOpcode() != ISD::SELECT)
+ return false;
+ return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
+ };
-//===--------------------------------------------------------------------===//
-// Fold offset of the global address if number of uses are below threshold.
-//===--------------------------------------------------------------------===//
-bool HexagonDAGToDAGISel::foldGlobalAddressImpl(SDValue &N, SDValue &R,
- bool ShouldLookForGP) {
- if (N.getOpcode() == ISD::ADD) {
- SDValue N0 = N.getOperand(0);
- SDValue N1 = N.getOperand(1);
- if ((ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32_GP)) ||
- (!ShouldLookForGP && (N0.getOpcode() == HexagonISD::CONST32))) {
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1);
- GlobalAddressSDNode *GA =
- dyn_cast<GlobalAddressSDNode>(N0.getOperand(0));
-
- if (Const && GA &&
- (GA->getOpcode() == ISD::TargetGlobalAddress)) {
- if ((N0.getOpcode() == HexagonISD::CONST32) &&
- !hasNumUsesBelowThresGA(GA))
- return false;
- R = CurDAG->getTargetGlobalAddress(GA->getGlobal(),
- SDLoc(Const),
- N.getValueType(),
- GA->getOffset() +
- (uint64_t)Const->getSExtValue());
- return true;
+ SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
+ EVT VT = I->getValueType(0);
+ bool SelN0 = IsSelect0(N0);
+ SDValue SOp = SelN0 ? N0 : N1;
+ SDValue VOp = SelN0 ? N1 : N0;
+
+ if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
+ SDValue SC = SOp.getOperand(0);
+ SDValue SX = SOp.getOperand(1);
+ SDValue SY = SOp.getOperand(2);
+ SDLoc DLS = SOp;
+ if (IsZero(SY)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
+ } else if (IsZero(SX)) {
+ SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
+ SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
+ DAG.ReplaceAllUsesWith(I, NewSel.getNode());
}
}
}
- return false;
}
+
bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
if (N.getOpcode() != ISD::FrameIndex)
return false;
@@ -1681,8 +1410,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
return false;
}
-bool HexagonDAGToDAGISel::isValueExtension(SDValue const &Val,
- unsigned FromBits, SDValue &Src) {
+bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
+ unsigned FromBits, SDValue &Src) {
unsigned Opc = Val.getOpcode();
switch (Opc) {
case ISD::SIGN_EXTEND:
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0072994..a2209ab 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -164,6 +164,12 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -239,6 +245,12 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
LocInfo = CCValAssign::ZExt;
else
LocInfo = CCValAssign::AExt;
+ } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
+ LocVT = MVT::i64;
+ LocInfo = CCValAssign::BCvt;
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
@@ -764,7 +776,7 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
}
- SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+ SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
getPointerTy(), TargetJT);
SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
DAG.getConstant(2, MVT::i32));
@@ -944,6 +956,192 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
false, 0);
}
+// Creates a SPLAT instruction for a constant value VAL.
+static SDValue createSplat(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue Val) {
+ if (VT.getSimpleVT() == MVT::v4i8)
+ return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val);
+
+ if (VT.getSimpleVT() == MVT::v4i16)
+ return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val);
+
+ return SDValue();
+}
+
+static bool isSExtFree(SDValue N) {
+ // A sign-extend of a truncate of a sign-extend is free.
+ if (N.getOpcode() == ISD::TRUNCATE &&
+ N.getOperand(0).getOpcode() == ISD::AssertSext)
+ return true;
+ // We have sign-extended loads.
+ if (N.getOpcode() == ISD::LOAD)
+ return true;
+ return false;
+}
+
+SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue InpVal = Op.getOperand(0);
+ if (isa<ConstantSDNode>(InpVal)) {
+ uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
+ return DAG.getTargetConstant(countPopulation(V), MVT::i64);
+ }
+ SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
+}
+
+SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Cmp = Op.getOperand(2);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
+
+ EVT VT = Op.getValueType();
+ EVT LHSVT = LHS.getValueType();
+ EVT RHSVT = RHS.getValueType();
+
+ if (LHSVT == MVT::v2i16) {
+ assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
+ unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
+ SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
+ SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
+ return SC;
+ }
+
+ // Treat all other vector types as legal.
+ if (VT.isVector())
+ return Op;
+
+ // Equals and not equals should use sign-extend, not zero-extend, since
+ // we can represent small negative values in the compare instructions.
+ // The LLVM default is to use zero-extend arbitrarily in these cases.
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
+ (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (C && C->getAPIntValue().isNegative()) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ if (isSExtFree(LHS) || isSExtFree(RHS)) {
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
+ RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
+ return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
+ LHS, RHS, Op.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
+ const {
+ SDValue PredOp = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
+ EVT OpVT = Op1.getValueType();
+ SDLoc DL(Op);
+
+ if (OpVT == MVT::v2i16) {
+ SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
+ SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
+ SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
+ SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
+ return TR;
+ }
+
+ return SDValue();
+}
+
+// Handle only specific vector loads.
+SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = Op.getOperand(1);
+ SDValue LoweredLoad;
+ SDValue Result;
+ SDValue Base = LoadNode->getBasePtr();
+ ISD::LoadExtType Ext = LoadNode->getExtensionType();
+ unsigned Alignment = LoadNode->getAlignment();
+ SDValue LoadChain;
+
+ if(Ext == ISD::NON_EXTLOAD)
+ Ext = ISD::ZEXTLOAD;
+
+ if (VT == MVT::v4i16) {
+ if (Alignment == 2) {
+ SDValue Loads[4];
+ // Base load.
+ Loads[0] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Base,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base+2 load.
+ SDValue Increment = DAG.getConstant(2, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[1] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base and base+2.
+ SDValue ShiftAmount = DAG.getConstant(16, MVT::i32);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[1], ShiftAmount);
+ SDValue Tmp2 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[0]);
+ // Base + 4.
+ Increment = DAG.getConstant(4, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[2] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // Base + 6.
+ Increment = DAG.getConstant(6, MVT::i32);
+ Ptr = DAG.getNode(ISD::ADD, DL, Base.getValueType(), Base, Increment);
+ Loads[3] = DAG.getExtLoad(Ext, DL, MVT::i32, Chain, Ptr,
+ LoadNode->getPointerInfo(), MVT::i16,
+ LoadNode->isVolatile(),
+ LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(),
+ Alignment);
+ // SHL 16, then OR base+4 and base+6.
+ Tmp1 = DAG.getNode(ISD::SHL, DL, MVT::i32, Loads[3], ShiftAmount);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, DL, MVT::i32, Tmp1, Loads[2]);
+ // Combine to i64. This could be optimised out later if we can
+ // affect reg allocation of this code.
+ Result = DAG.getNode(HexagonISD::COMBINE, DL, MVT::i64, Tmp4, Tmp2);
+ LoadChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Loads[0].getValue(1), Loads[1].getValue(1),
+ Loads[2].getValue(1), Loads[3].getValue(1));
+ } else {
+ // Perform default type expansion.
+ Result = DAG.getLoad(MVT::i64, DL, Chain, Ptr, LoadNode->getPointerInfo(),
+ LoadNode->isVolatile(), LoadNode->isNonTemporal(),
+ LoadNode->isInvariant(), LoadNode->getAlignment());
+ LoadChain = Result.getValue(1);
+ }
+ } else
+ llvm_unreachable("Custom lowering unsupported load");
+
+ Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
+ // Since we pretend to lower a load, we need the original chain
+ // info attached to the result.
+ SDValue Ops[] = { Result, LoadChain };
+
+ return DAG.getMergeValues(Ops, DL);
+}
+
+
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
@@ -1028,6 +1226,19 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
}
+// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
SDValue
HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1045,14 +1256,105 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
: TargetLowering(TM), Subtarget(&STI) {
// Set up the register classes.
+ addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
+ addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
+ addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
+ promoteLdStType(MVT::v4i8, MVT::i32);
+ promoteLdStType(MVT::v2i16, MVT::i32);
if (Subtarget->hasV5TOps()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
+ addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
+ promoteLdStType(MVT::v8i8, MVT::i64);
+
+ // Custom lower v4i16 load only. Let v4i16 store to be
+ // promoted for now.
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::i64);
+ setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::i64);
+ promoteLdStType(MVT::v2i32, MVT::i64);
+
+ for (unsigned i = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) i;
+
+ // Hexagon does not have support for the following operations,
+ // so they need to be expanded.
+ setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+
+ // Expand all any extend loads.
+ for (unsigned j = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++j)
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType) j, VT, Expand);
+
+ // Expand all trunc stores.
+ for (unsigned TargetVT = (unsigned) MVT::FIRST_VECTOR_VALUETYPE;
+ TargetVT <= (unsigned) MVT::LAST_VECTOR_VALUETYPE; ++TargetVT)
+ setTruncStoreAction(VT, (MVT::SimpleValueType) TargetVT, Expand);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::ConstantPool, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+
+ if (!isTypeLegal(VT))
+ continue;
+
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1308,9 +1610,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Turn FP extload into load/fextend.
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
- // Hexagon has a i1 sign extending load.
- for (MVT VT : MVT::integer_valuetypes())
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
+ // No extending loads from i32.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
+ }
+
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1358,6 +1665,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f64, Expand);
}
+ // Hexagon needs to optimize cases with negative constants.
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+
if (EmitJumpTables) {
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
} else {
@@ -1415,9 +1726,17 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
@@ -1429,7 +1748,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
+
setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -1463,27 +1782,63 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return nullptr;
- case HexagonISD::CONST32: return "HexagonISD::CONST32";
- case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
- case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
- case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
- case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
- case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
- case HexagonISD::BRICC: return "HexagonISD::BRICC";
- case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
- case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
- case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
- case HexagonISD::Hi: return "HexagonISD::Hi";
- case HexagonISD::Lo: return "HexagonISD::Lo";
- case HexagonISD::FTOI: return "HexagonISD::FTOI";
- case HexagonISD::ITOF: return "HexagonISD::ITOF";
- case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
- case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
- case HexagonISD::CALLR: return "HexagonISD::CALLR";
- case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
- case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
+ default: return nullptr;
+ case HexagonISD::CONST32: return "HexagonISD::CONST32";
+ case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
+ case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
+ case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+ case HexagonISD::CMPICC: return "HexagonISD::CMPICC";
+ case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC";
+ case HexagonISD::BRICC: return "HexagonISD::BRICC";
+ case HexagonISD::BRFCC: return "HexagonISD::BRFCC";
+ case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+ case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+ case HexagonISD::Hi: return "HexagonISD::Hi";
+ case HexagonISD::Lo: return "HexagonISD::Lo";
+ case HexagonISD::JT: return "HexagonISD::JT";
+ case HexagonISD::CP: return "HexagonISD::CP";
+ case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
+ case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
+ case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
+ case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
+ case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH";
+ case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
+ case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
+ case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB";
+ case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH";
+ case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
+ case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::VSRAW: return "HexagonISD::VSRAW";
+ case HexagonISD::VSRAH: return "HexagonISD::VSRAH";
+ case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
+ case HexagonISD::VSRLH: return "HexagonISD::VSRLH";
+ case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
+ case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
+ case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ";
+ case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT";
+ case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU";
+ case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ";
+ case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT";
+ case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU";
+ case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
+ case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
+ case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::INSERT_ri: return "HexagonISD::INSERT_ri";
+ case HexagonISD::INSERT_rd: return "HexagonISD::INSERT_rd";
+ case HexagonISD::INSERT_riv: return "HexagonISD::INSERT_riv";
+ case HexagonISD::INSERT_rdv: return "HexagonISD::INSERT_rdv";
+ case HexagonISD::EXTRACTU_ri: return "HexagonISD::EXTRACTU_ri";
+ case HexagonISD::EXTRACTU_rd: return "HexagonISD::EXTRACTU_rd";
+ case HexagonISD::EXTRACTU_riv: return "HexagonISD::EXTRACTU_riv";
+ case HexagonISD::EXTRACTU_rdv: return "HexagonISD::EXTRACTU_rdv";
+ case HexagonISD::FTOI: return "HexagonISD::FTOI";
+ case HexagonISD::ITOF: return "HexagonISD::ITOF";
+ case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
+ case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
+ case HexagonISD::CALLR: return "HexagonISD::CALLR";
+ case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
+ case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
}
}
@@ -1505,6 +1860,474 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
}
+// shouldExpandBuildVectorWithShuffles
+// Should we expand the build vector with shuffles?
+bool
+HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const {
+
+ // Hexagon vector shuffle operates on element sizes of bytes or halfwords
+ EVT EltVT = VT.getVectorElementType();
+ int EltBits = EltVT.getSizeInBits();
+ if ((EltBits != 8) && (EltBits != 16))
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3). V1 and
+// V2 are the two vectors to select data from, V3 is the permutation.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ if (SVN->isSplat()) {
+ int Lane = SVN->getSplatIndex();
+ if (Lane == -1) Lane = 0;
+
+ // Test if V1 is a SCALAR_TO_VECTOR.
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+
+ // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+ // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+ // reaches it).
+ if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(0))) {
+ bool IsScalarToVector = true;
+ for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+ if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+ IsScalarToVector = false;
+ break;
+ }
+ if (IsScalarToVector)
+ return createSplat(DAG, dl, VT, V1.getOperand(0));
+ }
+ return createSplat(DAG, dl, VT, DAG.getConstant(Lane, MVT::i32));
+ }
+
+ // FIXME: We need to support more general vector shuffles. See
+ // below the comment from the ARM backend that deals in the general
+ // case with the vector shuffles. For now, let expand handle these.
+ return SDValue();
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+}
+
+// If BUILD_VECTOR has same base element repeated several times,
+// report true.
+static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
+ unsigned NElts = BVN->getNumOperands();
+ SDValue V0 = BVN->getOperand(0);
+
+ for (unsigned i = 1, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i) != V0)
+ return false;
+ }
+ return true;
+}
+
+// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
+// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
+static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
+ BuildVectorSDNode *BVN = 0;
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDValue V3;
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V2;
+ else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
+ isCommonSplatElement(BVN))
+ V3 = V1;
+ else
+ return SDValue();
+
+ SDValue CommonSplat = BVN->getOperand(0);
+ SDValue Result;
+
+ if (VT.getSimpleVT() == MVT::v4i16) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else if (VT.getSimpleVT() == MVT::v2i32) {
+ switch (Op.getOpcode()) {
+ case ISD::SRA:
+ Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SHL:
+ Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat);
+ break;
+ case ISD::SRL:
+ Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat);
+ break;
+ default:
+ return SDValue();
+ }
+ } else {
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+}
+
+SDValue
+HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ unsigned Size = VT.getSizeInBits();
+
+ // A vector larger than 64 bits cannot be represented in Hexagon.
+ // Expand will split the vector.
+ if (Size > 64)
+ return SDValue();
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned NElts = BVN->getNumOperands();
+
+ // Try to generate a SPLAT instruction.
+ if ((VT.getSimpleVT() == MVT::v4i8 || VT.getSimpleVT() == MVT::v4i16) &&
+ (BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) && SplatBitSize <= 16)) {
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ int32_t SextVal = ((int32_t) (SplatBits << (32 - SplatBitSize)) >>
+ (32 - SplatBitSize));
+ return createSplat(DAG, dl, VT, DAG.getConstant(SextVal, MVT::i32));
+ }
+
+ // Try to generate COMBINE to build v2i32 vectors.
+ if (VT.getSimpleVT() == MVT::v2i32) {
+ SDValue V0 = BVN->getOperand(0);
+ SDValue V1 = BVN->getOperand(1);
+
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = DAG.getConstant(0, MVT::i32);
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = DAG.getConstant(0, MVT::i32);
+
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(V0);
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(V1);
+ // If the element isn't a constant, it is in a register:
+ // generate a COMBINE Register Register instruction.
+ if (!C0 || !C1)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+
+ // If one of the operands is an 8 bit integer constant, generate
+ // a COMBINE Immediate Immediate instruction.
+ if (isInt<8>(C0->getSExtValue()) ||
+ isInt<8>(C1->getSExtValue()))
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, V1, V0);
+ }
+
+ // Try to generate a S2_packhl to build v2i16 vectors.
+ if (VT.getSimpleVT() == MVT::v2i16) {
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(BVN->getOperand(i));
+ // If the element isn't a constant, it is in a register:
+ // generate a S2_packhl instruction.
+ if (!Cst) {
+ SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
+ BVN->getOperand(1), BVN->getOperand(0));
+
+ return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+ pack);
+ }
+ }
+ }
+
+ // In the general case, generate a CONST32 or a CONST64 for constant vectors,
+ // and insert_vector_elt for all the other cases.
+ uint64_t Res = 0;
+ unsigned EltSize = Size / NElts;
+ SDValue ConstVal;
+ uint64_t Mask = ~uint64_t(0ULL) >> (64 - EltSize);
+ bool HasNonConstantElements = false;
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon's
+ // combine, const64, etc. are Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (Operand.getOpcode() == ISD::UNDEF)
+ continue;
+
+ int64_t Val = 0;
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Operand))
+ Val = Cst->getSExtValue();
+ else
+ HasNonConstantElements = true;
+
+ Val &= Mask;
+ Res = (Res << EltSize) | Val;
+ }
+
+ if (Size == 64)
+ ConstVal = DAG.getConstant(Res, MVT::i64);
+ else
+ ConstVal = DAG.getConstant(Res, MVT::i32);
+
+ // When there are non constant operands, add them with INSERT_VECTOR_ELT to
+ // ConstVal, the constant part of the vector.
+ if (HasNonConstantElements) {
+ EVT EltVT = VT.getVectorElementType();
+ SDValue Width = DAG.getConstant(EltVT.getSizeInBits(), MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ // LLVM's BUILD_VECTOR operands are in Little Endian mode, whereas Hexagon
+ // is Big Endian.
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = BVN->getOperand(OpIdx);
+ if (dyn_cast<ConstantSDNode>(Operand))
+ // This operand is already in ConstVal.
+ continue;
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
+
+ SDValue Idx = DAG.getConstant(OpIdx, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+ }
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned NElts = Op.getNumOperands();
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), MVT::i64);
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue ConstVal = DAG.getConstant(0, MVT::i64);
+
+ ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+ ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
+
+ if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v2i16 to a single v4i16.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
+ }
+
+ if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
+ if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
+ // We are trying to concat two v4i8 to a single v8i8.
+ SDValue Vec0 = Op.getOperand(1);
+ SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ }
+ }
+
+ for (unsigned i = 0, e = NElts; i != e; ++i) {
+ unsigned OpIdx = NElts - i - 1;
+ SDValue Operand = Op.getOperand(OpIdx);
+
+ if (VT.getSizeInBits() == 64 &&
+ Operand.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ }
+
+ SDValue Idx = DAG.getConstant(OpIdx, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+ const SDValue Ops[] = {ConstVal, Operand, Combined};
+
+ if (VT.getSizeInBits() == 32)
+ ConstVal = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ ConstVal = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+}
+
+SDValue
+HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Idx = Op.getOperand(1);
+ SDValue Vec = Op.getOperand(0);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, MVT::i64);
+
+ // Constant element number.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getZExtValue() * EltSize, MVT::i32);
+ const SDValue Ops[] = {Vec, Width, Offset};
+
+ ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
+ assert(W && "Non constant width in LowerEXTRACT_VECTOR");
+
+ SDValue N;
+ // For certain extracts, it is a simple _hi/_lo subreg.
+ if (VecVT.getSimpleVT() == MVT::v2i32) {
+ // v2i32 -> i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 1)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if ((VecVT.getSimpleVT() == MVT::v4i16) &&
+ (W->getZExtValue() == 32)) {
+ // v4i16 -> v2i16/i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 2)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if ((VecVT.getSimpleVT() == MVT::v8i8) &&
+ (W->getZExtValue() == 32)) {
+ // v8i8 -> v4i8/i32 vselect.
+ if (C->getZExtValue() == 0)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
+ MVT::i32, Vec);
+ else if (C->getZExtValue() == 4)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
+ MVT::i32, Vec);
+ else
+ llvm_unreachable("Bad offset");
+ } else if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU_ri, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU_rd, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ const SDValue Ops[] = {Vec, Combined};
+
+ SDValue N;
+ if (VecVT.getSizeInBits() == 32) {
+ N = DAG.getNode(HexagonISD::EXTRACTU_riv, dl, MVT::i32, Ops);
+ } else {
+ N = DAG.getNode(HexagonISD::EXTRACTU_rdv, dl, MVT::i64, Ops);
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
+SDValue
+HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ int EltSize = EltVT.getSizeInBits();
+ SDValue Width = DAG.getConstant(Op.getOpcode() == ISD::INSERT_VECTOR_ELT ?
+ EltSize : VTN * EltSize, MVT::i64);
+
+ if (ConstantSDNode *C = cast<ConstantSDNode>(Idx)) {
+ SDValue Offset = DAG.getConstant(C->getSExtValue() * EltSize, MVT::i32);
+ const SDValue Ops[] = {Vec, Val, Width, Offset};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT_ri, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT_rd, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+ }
+
+ // Variable element number.
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i32, Idx,
+ DAG.getConstant(EltSize, MVT::i32));
+ SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
+ DAG.getConstant(32, MVT::i64));
+ SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
+
+ if (VT.getSizeInBits() == 64 &&
+ Val.getValueType().getSizeInBits() == 32) {
+ SDValue C = DAG.getConstant(0, MVT::i32);
+ Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
+ }
+
+ const SDValue Ops[] = {Vec, Val, Combined};
+
+ SDValue N;
+ if (VT.getSizeInBits() == 32)
+ N = DAG.getNode(HexagonISD::INSERT_riv, dl, MVT::i32, Ops);
+ else
+ N = DAG.getNode(HexagonISD::INSERT_rdv, dl, MVT::i64, Ops);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, N);
+}
+
bool
HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
// Assuming the caller does not have either a signext or zeroext modifier, and
@@ -1549,7 +2372,19 @@ SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL:
+ return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
// Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -1561,9 +2396,14 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
+ // Custom lower some vector loads.
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SELECT: return Op;
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
+ case ISD::CTPOP: return LowerCTPOP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 151c28f..34b1ebb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -37,6 +37,10 @@ bool isPositiveHalfWord(SDNode *N);
ADJDYNALLOC,
ARGEXTEND,
+ PIC_ADD,
+ AT_GOT,
+ AT_PCREL,
+
CMPICC, // Compare two GPR operands, set icc.
CMPFCC, // Compare two FP operands, set fcc.
BRICC, // Branch to dest on icc condition
@@ -54,23 +58,44 @@ bool isPositiveHalfWord(SDNode *N);
CALLR,
RET_FLAG, // Return with a flag operand.
- BR_JT, // Jump table.
- BARRIER, // Memory barrier
+ BR_JT, // Branch through jump table.
+ BARRIER, // Memory barrier.
+ JT, // Jump table.
+ CP, // Constant pool.
POPCOUNT,
COMBINE,
- WrapperJT,
- WrapperCP,
- WrapperCombineII,
- WrapperCombineRR,
- WrapperCombineRI_V4,
- WrapperCombineIR_V4,
- WrapperPackhl,
- WrapperSplatB,
- WrapperSplatH,
- WrapperShuffEB,
- WrapperShuffEH,
- WrapperShuffOB,
- WrapperShuffOH,
+ PACKHL,
+ VSPLATB,
+ VSPLATH,
+ SHUFFEB,
+ SHUFFEH,
+ SHUFFOB,
+ SHUFFOH,
+ VSXTBH,
+ VSXTBW,
+ VSRAW,
+ VSRAH,
+ VSRLW,
+ VSRLH,
+ VSHLW,
+ VSHLH,
+ VCMPBEQ,
+ VCMPBGT,
+ VCMPBGTU,
+ VCMPHEQ,
+ VCMPHGT,
+ VCMPHGTU,
+ VCMPWEQ,
+ VCMPWGT,
+ VCMPWGTU,
+ INSERT_ri,
+ INSERT_rd,
+ INSERT_riv,
+ INSERT_rdv,
+ EXTRACTU_ri,
+ EXTRACTU_rd,
+ EXTRACTU_riv,
+ EXTRACTU_rdv,
TC_RETURN,
EH_RETURN,
DCFETCH
@@ -85,6 +110,8 @@ bool isPositiveHalfWord(SDNode *N);
bool CanReturnSmallStruct(const Function* CalleeFn,
unsigned& RetSize) const;
+ void promoteLdStType(EVT VT, EVT PromotedLdStVT);
+
public:
const HexagonSubtarget *Subtarget;
explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -110,10 +137,17 @@ bool isPositiveHalfWord(SDNode *N);
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ // Should we expand the build vector with shuffles?
+ bool shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
@@ -137,9 +171,13 @@ bool isPositiveHalfWord(SDNode *N);
const SmallVectorImpl<SDValue> &OutVals,
SDValue Callee) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -170,6 +208,15 @@ bool isPositiveHalfWord(SDNode *N);
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "v")
+ return InlineAsm::Constraint_v;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
// Intrinsics
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 3d04678..36a7e9f 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -76,7 +76,7 @@ class OpcodeHexagon {
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
string cstr, InstrItinClass itin, IType type>
- : Instruction, OpcodeHexagon {
+ : Instruction {
let Namespace = "Hexagon";
dag OutOperandList = outs;
@@ -84,18 +84,18 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let AsmString = asmstr;
let Pattern = pattern;
let Constraints = cstr;
- let Itinerary = itin;
- let Size = 4;
-
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
-
- // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
-
- // Instruction type according to the ISA.
+ let Itinerary = itin;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ // Instruction type according to the ISA.
IType Type = type;
let TSFlags{4-0} = Type.Value;
@@ -197,7 +197,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let mayLoad = 1 in
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
let mayLoad = 1 in
class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -217,7 +217,7 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1 in
class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -225,7 +225,7 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -234,7 +234,7 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
@@ -247,13 +247,14 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
+ OpcodeHexagon;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -261,7 +262,8 @@ class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
@@ -274,7 +276,8 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -290,7 +293,8 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ OpcodeHexagon;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -304,34 +308,37 @@ class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Definition of the instruction class NOT CHANGED.
class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>, OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDO, TypePSEUDO>,
+ OpcodeHexagon;
let isCodeGenOnly = 1, isPseudo = 1 in
class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr="">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>,
+ OpcodeHexagon;
//===----------------------------------------------------------------------===//
// Instruction Classes Definitions -
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 5fec80b..7f7b2c9 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,10 +17,88 @@
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
-def TypeMEMOP : IType<9>;
-def TypeNV : IType<10>;
+def TypeMEMOP : IType<9>;
+def TypeNV : IType<10>;
+def TypeDUPLEX : IType<11>;
def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
+def TypeAG_VX : IType<28>;
+def TypeAG_VM : IType<29>;
+def TypePREFIX : IType<30>;
+
+// Duplex Instruction Class Declaration
+//===----------------------------------------------------------------------===//
+
+class OpcodeDuplex {
+ field bits<32> Inst = ?; // Default to an invalid insn.
+ bits<4> IClass = 0; // ICLASS
+ bits<13> ISubHi = 0; // Low sub-insn
+ bits<13> ISubLo = 0; // High sub-insn
+
+ let Inst{31-29} = IClass{3-1};
+ let Inst{13} = IClass{0};
+ let Inst{15-14} = 0;
+ let Inst{28-16} = ISubHi;
+ let Inst{12-0} = ISubLo;
+}
+
+class InstDuplex<bits<4> iClass, list<dag> pattern = [],
+ string cstr = "">
+ : Instruction, OpcodeDuplex {
+ let Namespace = "Hexagon";
+ IType Type = TypeDUPLEX; // uses slot 0,1
+ let isCodeGenOnly = 1;
+ let hasSideEffects = 0;
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ let IClass = iClass;
+ let Constraints = cstr;
+ let Itinerary = DUPLEX;
+ let Size = 4;
+
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+
+ // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
+
+ let TSFlags{4-0} = Type.Value;
+
+ // Predicated instructions.
+ bits<1> isPredicated = 0;
+ let TSFlags{6} = isPredicated;
+ bits<1> isPredicatedFalse = 0;
+ let TSFlags{7} = isPredicatedFalse;
+ bits<1> isPredicatedNew = 0;
+ let TSFlags{8} = isPredicatedNew;
+
+ // New-value insn helper fields.
+ bits<1> isNewValue = 0;
+ let TSFlags{9} = isNewValue; // New-value consumer insn.
+ bits<1> hasNewValue = 0;
+ let TSFlags{10} = hasNewValue; // New-value producer insn.
+ bits<3> opNewValue = 0;
+ let TSFlags{13-11} = opNewValue; // New-value produced operand.
+ bits<1> isNVStorable = 0;
+ let TSFlags{14} = isNVStorable; // Store that can become new-value store.
+ bits<1> isNVStore = 0;
+ let TSFlags{15} = isNVStore; // New-value store insn.
+
+ // Immediate extender helper fields.
+ bits<1> isExtendable = 0;
+ let TSFlags{16} = isExtendable; // Insn may be extended.
+ bits<1> isExtended = 0;
+ let TSFlags{17} = isExtended; // Insn must be extended.
+ bits<3> opExtendable = 0;
+ let TSFlags{20-18} = opExtendable; // Which operand may be extended.
+ bits<1> isExtentSigned = 0;
+ let TSFlags{21} = isExtentSigned; // Signed or unsigned range.
+ bits<5> opExtentBits = 0;
+ let TSFlags{26-22} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{28-27} = opExtentAlign; // Alignment exponent before extending.
+}
//----------------------------------------------------------------------------//
// Instruction Classes Definitions
@@ -31,7 +109,7 @@ def TypePREFIX : IType<30>;
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -56,7 +134,8 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1, mayStore = 1 in
class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+ OpcodeHexagon;
class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
@@ -65,8 +144,9 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
- TypePREFIX>;
+ TypePREFIX>, OpcodeHexagon;
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ OpcodeHexagon;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 9bae12c..fbf1ca9 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -62,10 +62,8 @@ const int Hexagon_MEMB_AUTOINC_MIN = -8;
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
- : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
- RI(ST), Subtarget(ST) {
-}
-
+ : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+ RI(), Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
@@ -159,15 +157,19 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else {
- BuildMI(&MBB, DL,
- get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+ // If Cond[0] is a basic block, insert ENDLOOP0.
+ if (Cond[0].isMBB())
+ BuildMI(&MBB, DL, get(Hexagon::ENDLOOP0)).addMBB(Cond[0].getMBB());
+ else
+ BuildMI(&MBB, DL,
+ get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
}
return 1;
}
+ // We don't handle ENDLOOP0 with a conditional branch in AnalyzeBranch.
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
-
return 2;
}
@@ -211,9 +213,11 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
-
+
+ bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
+ I->getOperand(0).isMBB();
// Delete the JMP if it's equivalent to a fall-through.
- if (AllowModify && I->getOpcode() == Hexagon::J2_jump &&
+ if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
I->eraseFromParent();
@@ -243,6 +247,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
} while(I);
int LastOpcode = LastInst->getOpcode();
+ int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
+ // If the branch target is not a basic block, it could be a tail call.
+ // (It is, if the target is a function.)
+ if (LastOpcode == Hexagon::J2_jump && !LastInst->getOperand(0).isMBB())
+ return true;
+ if (SecLastOpcode == Hexagon::J2_jump &&
+ !SecondLastInst->getOperand(0).isMBB())
+ return true;
bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
@@ -270,8 +282,6 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
- int SecLastOpcode = SecondLastInst->getOpcode();
-
bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
@@ -308,30 +318,35 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- int BOpc = Hexagon::J2_jump;
- int BccOpc = Hexagon::J2_jumpt;
- int BccOpcNot = Hexagon::J2_jumpf;
-
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
- if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
- I->getOpcode() != BccOpcNot)
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
+ unsigned Opc1 = I->getOpcode();
+ switch (Opc1) {
+ case Hexagon::J2_jump:
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
+ case Hexagon::ENDLOOP0:
+ I->eraseFromParent();
+ break;
+ default:
+ return 0;
+ }
I = MBB.end();
if (I == MBB.begin()) return 1;
--I;
- if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
+ unsigned Opc2 = I->getOpcode();
+ switch (Opc2) {
+ case Hexagon::J2_jumpt:
+ case Hexagon::J2_jumpf:
+ case Hexagon::ENDLOOP0:
+ I->eraseFromParent();
+ return 2;
+ default:
+ return 1;
+ }
}
@@ -549,12 +564,95 @@ void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
llvm_unreachable("Unimplemented");
}
+bool
+HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ const HexagonRegisterInfo &TRI = getRegisterInfo();
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::TFR_PdTrue: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::TFR_PdFalse: {
+ unsigned Reg = MI->getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::VMULW: {
+ // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ return true;
+ }
+ case Hexagon::VMULW_ACC: {
+ // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Src1Reg = MI->getOperand(1).getReg();
+ unsigned Src2Reg = MI->getOperand(2).getReg();
+ unsigned Src3Reg = MI->getOperand(3).getReg();
+ unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ .addReg(Src2SubHi).addReg(Src3SubHi);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+ TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ .addReg(Src2SubLo).addReg(Src3SubLo);
+ MBB.erase(MI);
+ MRI.clearKillFlags(Src1SubHi);
+ MRI.clearKillFlags(Src1SubLo);
+ MRI.clearKillFlags(Src2SubHi);
+ MRI.clearKillFlags(Src2SubLo);
+ MRI.clearKillFlags(Src3SubHi);
+ MRI.clearKillFlags(Src3SubLo);
+ return true;
+ }
+ case Hexagon::TCRETURNi:
+ MI->setDesc(get(Hexagon::J2_jump));
+ return true;
+ case Hexagon::TCRETURNr:
+ MI->setDesc(get(Hexagon::J2_jumpr));
+ return true;
+ }
+
+ return false;
+}
MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FI) const {
// Hexagon_TODO: Implement.
return nullptr;
}
@@ -641,7 +739,7 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
switch(Opc) {
case Hexagon::A2_tfrsi:
- return isInt<12>(MI->getOperand(1).getImm());
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -1036,6 +1134,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
//
bool HexagonInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ if (!Cond.empty() && Cond[0].isMBB())
+ return true;
if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
Cond.erase(Cond.begin());
} else {
@@ -1521,7 +1621,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
switch (MI->getOpcode()) {
default: llvm_unreachable("Unknown .new type");
- // store new value byte
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
@@ -1531,6 +1630,20 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S4_storeri_ur:
return Hexagon::S4_storerinew_ur;
+ case Hexagon::S2_storerb_pci:
+ return Hexagon::S2_storerb_pci;
+
+ case Hexagon::S2_storeri_pci:
+ return Hexagon::S2_storeri_pci;
+
+ case Hexagon::S2_storerh_pci:
+ return Hexagon::S2_storerh_pci;
+
+ case Hexagon::S2_storerd_pci:
+ return Hexagon::S2_storerd_pci;
+
+ case Hexagon::S2_storerf_pci:
+ return Hexagon::S2_storerf_pci;
}
return 0;
}
@@ -1647,7 +1760,7 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
// We currently only handle isGlobal() because it is the only kind of
// object we are going to end up with here for now.
// In the future we probably should add isSymbol(), etc.
- if (MO.isGlobal() || MO.isSymbol())
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress())
return true;
// If the extendable operand is not 'Immediate' type, the instruction should
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 6acfbec..2644248 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -26,7 +26,7 @@
namespace llvm {
struct EVT;
-
+class HexagonSubtarget;
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
@@ -102,15 +102,21 @@ public:
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs) const;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ /// expandPostRAPseudo - This function is called for all pseudo instructions
+ /// that remain after register allocation. Many pseudo instructions are
+ /// created to help register allocation. This is the place to convert them
+ /// into real instructions. The target can edit MI in place, or it can insert
+ /// new instructions and erase MI. The function should return true if
+ /// anything was changed.
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override {
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override {
return nullptr;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 60635cf..19cf993 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -104,10 +104,16 @@ def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
//===----------------------------------------------------------------------===//
// ALU32/ALU +
//===----------------------------------------------------------------------===//
+// Add.
+
+def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
+def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
@@ -243,6 +249,9 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
+
let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
: ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -321,7 +330,7 @@ let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
"$Rdd = combine(#$s8, #$S8)",
[(set (i64 DoubleRegs:$Rdd),
- (i64 (HexagonCOMBINE(i32 s8ExtPred:$s8), (i32 s8ImmPred:$S8))))]> {
+ (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
bits<5> Rdd;
bits<8> s8;
bits<8> S8;
@@ -406,7 +415,7 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> {
defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
-def: Pat<(i32 (add I32:$Rs, s16ExtPred:$s16)),
+def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
(i32 (A2_addi I32:$Rs, imm:$s16))>;
//===----------------------------------------------------------------------===//
@@ -420,7 +429,7 @@ class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
: ALU32_ri <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, s10Ext:$s10),
"$Rd = "#mnemonic#"($Rs, #$s10)" ,
- [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10))]> {
+ [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
bits<5> Rd;
bits<5> Rs;
bits<10> s10;
@@ -465,7 +474,7 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
let Inst{27-24} = 0b1111;
}
-def: Pat<(sub s10ExtPred:$s10, IntRegs:$Rs),
+def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
(A2_subri imm:$s10, IntRegs:$Rs)>;
// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
@@ -613,7 +622,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
- [(set (i32 IntRegs:$Rd), s16ExtPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
ImmRegRel, PredRel {
bits<5> Rd;
bits<16> s16;
@@ -637,9 +646,13 @@ def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
// TODO: see if this instruction can be deleted..
let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
- isAsmParserOnly = 1 in
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u6Ext:$src1),
+ isAsmParserOnly = 1 in {
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
"$dst = #$src1">;
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s8Ext:$src1, s8Imm:$src2),
+ "$dst = combine(##$src1, #$src2)">;
+}
//===----------------------------------------------------------------------===//
// ALU32/ALU -
@@ -677,11 +690,11 @@ let opExtendable = 3 in
def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
"$Rd = mux($Pu, $Rs, #$s8)">;
-def : Pat<(i32 (select I1:$Pu, s8ExtPred:$s8, I32:$Rs)),
- (C2_muxri I1:$Pu, s8ExtPred:$s8, I32:$Rs)>;
+def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s8ExtPred:$s8)),
- (C2_muxir I1:$Pu, I32:$Rs, s8ExtPred:$s8)>;
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
// C2_muxii: Scalar mux immediates.
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
@@ -690,7 +703,7 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
(ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
"$Rd = mux($Pu, #$s8, #$S8)" ,
[(set (i32 IntRegs:$Rd),
- (i32 (select I1:$Pu, s8ExtPred:$s8, s8ImmPred:$S8)))] > {
+ (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
bits<5> Rd;
bits<2> Pu;
bits<8> s8;
@@ -706,6 +719,12 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
let Inst{4-0} = Rd;
}
+let isCodeGenOnly = 1, isPseudo = 1 in
+def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+
//===----------------------------------------------------------------------===//
// template class for non-predicated alu32_2op instructions
// - aslh, asrh, sxtb, sxth, zxth
@@ -987,6 +1006,17 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
//===----------------------------------------------------------------------===//
// ALU32/PRED +
//===----------------------------------------------------------------------===//
+// No bits needed. If cmp.ge is found the assembler parser will
+// transform it to cmp.gt subtracting 1 from the immediate.
+let isPseudo = 1 in {
+def C2_cmpgei: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+ "$Pd = cmp.ge($Rs, #$s8)">;
+def C2_cmpgeui: ALU32Inst <
+ (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+ "$Pd = cmp.geu($Rs, #$s8)">;
+}
+
//===----------------------------------------------------------------------===//
// ALU32/PRED -
@@ -1742,27 +1772,29 @@ def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
InstHexagon MI> {
def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+ def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
(VT (MI IntRegs:$Rs, imm:$Off))>;
def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
}
let AddedComplexity = 20 in {
- defm: Loadx_pat<load, i32, s11_2ExtPred, L2_loadri_io>;
- defm: Loadx_pat<load, i64, s11_3ExtPred, L2_loadrd_io>;
- defm: Loadx_pat<atomic_load_8 , i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<atomic_load_16, i32, s11_1ExtPred, L2_loadruh_io>;
- defm: Loadx_pat<atomic_load_32, i32, s11_2ExtPred, L2_loadri_io>;
- defm: Loadx_pat<atomic_load_64, i64, s11_3ExtPred, L2_loadrd_io>;
-
- defm: Loadx_pat<extloadi1, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi8, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi16, i32, s11_1ExtPred, L2_loadruh_io>;
- defm: Loadx_pat<sextloadi8, i32, s11_0ExtPred, L2_loadrb_io>;
- defm: Loadx_pat<sextloadi16, i32, s11_1ExtPred, L2_loadrh_io>;
- defm: Loadx_pat<zextloadi1, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi8, i32, s11_0ExtPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi16, i32, s11_1ExtPred, L2_loadruh_io>;
+ defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
+ defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+ defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
+ defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
+ defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
// No sextloadi1.
}
@@ -2707,7 +2739,7 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
- [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u8ExtPred:$u8))]>;
+ [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
def M2_mpysin : T_MType_mpy_ri <1, u8Imm,
[(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
@@ -2729,7 +2761,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
"$dst = mpyi($src1, #$src2)",
[(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s9ExtPred:$src2))]>, ImmRegRel;
+ s32ImmPred:$src2))]>, ImmRegRel;
let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
InputType = "imm" in
@@ -2780,7 +2812,7 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
[(set (i32 IntRegs:$dst),
- (add (mul IntRegs:$src2, u8ExtPred:$src3),
+ (add (mul IntRegs:$src2, u32ImmPred:$src3),
IntRegs:$src1))]>, ImmRegRel;
def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
@@ -2793,7 +2825,7 @@ let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
[(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s8_16ExtPred:$src3),
+ (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
(i32 IntRegs:$src1)))]>, ImmRegRel;
def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
@@ -2825,9 +2857,9 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
(MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u8ExtPred>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s8_16ExtPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
def : T_MType_acc_pat2 <M2_nacci, add, sub>;
//===----------------------------------------------------------------------===//
@@ -3514,7 +3546,8 @@ let addrMode = BaseImmOffset, InputType = "imm" in {
}
// Patterns for generating stores, where the address takes different forms:
-// - frameindex,,
+// - frameindex,
+// - frameindex + offset,
// - base + offset,
// - simple (base address without offset).
// These would usually be used together (via Storex_pat defined below), but
@@ -3522,6 +3555,10 @@ let addrMode = BaseImmOffset, InputType = "imm" in {
// AddedComplexity) to the individual patterns.
class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
: Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+class Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
class Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
InstHexagon MI>
: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
@@ -3537,6 +3574,10 @@ class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
InstHexagon MI>
: Pat<(Store Value:$Rs, AddrFI:$fi),
(MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+class Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI>
+ : Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
class Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
PatFrag ValueMod, InstHexagon MI>
: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
@@ -3548,14 +3589,16 @@ class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
InstHexagon MI> {
- def: Storex_fi_pat <Store, Value, MI>;
- def: Storex_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_fi_pat <Store, Value, MI>;
+ def: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+ def: Storex_add_pat <Store, Value, ImmPred, MI>;
}
multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
PatFrag ValueMod, InstHexagon MI> {
- def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
- def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
+ def: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ def: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
}
// Regular stores in the DAG have two operands: value and address.
@@ -3567,15 +3610,15 @@ class SwapSt<PatFrag F>
: PatFrag<(ops node:$val, node:$ptr), F.Fragment>;
let AddedComplexity = 20 in {
- defm: Storex_pat<truncstorei8, I32, s11_0ExtPred, S2_storerb_io>;
- defm: Storex_pat<truncstorei16, I32, s11_1ExtPred, S2_storerh_io>;
- defm: Storex_pat<store, I32, s11_2ExtPred, S2_storeri_io>;
- defm: Storex_pat<store, I64, s11_3ExtPred, S2_storerd_io>;
+ defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
- defm: Storex_pat<SwapSt<atomic_store_8>, I32, s11_0ExtPred, S2_storerb_io>;
- defm: Storex_pat<SwapSt<atomic_store_16>, I32, s11_1ExtPred, S2_storerh_io>;
- defm: Storex_pat<SwapSt<atomic_store_32>, I32, s11_2ExtPred, S2_storeri_io>;
- defm: Storex_pat<SwapSt<atomic_store_64>, I64, s11_3ExtPred, S2_storerd_io>;
+ defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
}
// Simple patterns should be tried with the least priority.
@@ -3590,9 +3633,9 @@ def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
let AddedComplexity = 20 in {
- defm: Storexm_pat<truncstorei8, I64, s11_0ExtPred, LoReg, S2_storerb_io>;
- defm: Storexm_pat<truncstorei16, I64, s11_1ExtPred, LoReg, S2_storerh_io>;
- defm: Storexm_pat<truncstorei32, I64, s11_2ExtPred, LoReg, S2_storeri_io>;
+ defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+ defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+ defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
}
def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
@@ -4321,6 +4364,14 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
// XTYPE/PERM +
//===----------------------------------------------------------------------===//
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+ (i32 8)),
+ (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+ (zextloadi8 (i32 IntRegs:$b))),
+ (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
//===----------------------------------------------------------------------===//
// XTYPE/PERM -
//===----------------------------------------------------------------------===//
@@ -4364,7 +4415,7 @@ def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
// Patterns for loads of i1:
def: Pat<(i1 (load AddrFI:$fi)),
(C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
-def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s11_0ExtPred:$Off))),
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
def: Pat<(i1 (load (i32 IntRegs:$Rs))),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
@@ -4375,7 +4426,7 @@ def I1toI32: OutPatFrag<(ops node:$Rs),
def I32toI1: OutPatFrag<(ops node:$Rs),
(i1 (C2_tfrrp (i32 $Rs)))>;
-defm: Storexm_pat<store, I1, s11_0ExtPred, I1toI32, S2_storerb_io>;
+defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
//===----------------------------------------------------------------------===//
@@ -4474,6 +4525,12 @@ def Y2_barrier : SYSInst<(outs), (ins),
//===----------------------------------------------------------------------===//
// SYSTEM/SUPER -
//===----------------------------------------------------------------------===//
+
+// Generate frameindex addresses.
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
+def TFR_FI: ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$fi, s32Imm:$Off), "">;
+
//===----------------------------------------------------------------------===//
// CRUSER - Type.
//===----------------------------------------------------------------------===//
@@ -4519,6 +4576,11 @@ class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
multiclass LOOP_ri<string mnemonic> {
def i : LOOP_iBase<mnemonic, brtarget>;
def r : LOOP_rBase<mnemonic, brtarget>;
+
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
+ def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
+ }
}
@@ -4676,36 +4738,6 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
let Inst{20-16} = Rs;
}
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
- s12ImmPred:$src3)))]>;
-
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- (i32 IntRegs:$src3))))]>;
-
-let AddedComplexity = 100, isPredicated = 1, isCodeGenOnly = 1 in
-def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
- (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
- "Error; should not emit",
- [(set (i32 IntRegs:$dst),
- (i32 (select (i1 PredRegs:$src1), s12ImmPred:$src2,
- s12ImmPred:$src3)))]>;
-
-// Generate frameindex addresses.
-let isReMaterializable = 1, isCodeGenOnly = 1 in
-def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
- "$dst = add($src1)",
- [(set (i32 IntRegs:$dst), ADDRri:$src1)]>;
-
// Support for generating global address.
// Taken from X86InstrInfo.td.
def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
@@ -4750,30 +4782,29 @@ def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
"$dst.h = #HI($label@GOTREL)",
[]>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def LOi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.l = #LO($imm_value)",
- []>;
-
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOT)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def HIi : ALU32_ri<(outs IntRegs:$dst), (ins i32imm:$imm_value),
- "$dst.h = #HI($imm_value)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOT)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def LO_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.l = #LO($jt)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.h = #HI($global@GOTREL)",
+ []>;
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- isAsmParserOnly = 1 in
-def HI_jt : ALU32_ri<(outs IntRegs:$dst), (ins jumptablebase:$jt),
- "$dst.h = #HI($jt)",
- []>;
+let isReMaterializable = 1, isMoveImm = 1,
+ isCodeGenOnly = 1, hasSideEffects = 0 in
+def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
+ "$dst.l = #LO($global@GOTREL)",
+ []>;
// This pattern is incorrect. When we add small data, we should change
// this pattern to use memw(#foo).
@@ -4785,31 +4816,19 @@ def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
(load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
let isReMaterializable = 1, isMoveImm = 1 in
-def CONST32_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32 tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_set_jt : CONSTLDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
"$dst = CONST32(#$jt)",
[(set (i32 IntRegs:$dst),
(HexagonCONST32 tjumptable:$jt))]>;
let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST32GP_set : LDInst2<(outs IntRegs:$dst), (ins globaladdress:$global),
- "$dst = CONST32(#$global)",
- [(set (i32 IntRegs:$dst),
- (HexagonCONST32_GP tglobaladdr:$global))]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
"$dst = CONST32(#$global)",
[(set (i32 IntRegs:$dst), imm:$global) ]>;
-// Map BlockAddress lowering to CONST32_Int_Real
-def : Pat<(HexagonCONST32_GP tblockaddress:$addr),
- (CONST32_Int_Real tblockaddress:$addr)>;
+// Map TLS addressses to a CONST32 instruction
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16Ext:$label)>;
let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label),
@@ -4869,21 +4888,17 @@ let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
def TCRETURNr : T_JMPr;
// Direct tail-calls.
-let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
-isTerminator = 1, isCodeGenOnly = 1 in {
- def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
- def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst",
- [], "", J_tc_2early_SLOT23>;
-}
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
//Tail calls.
def: Pat<(HexagonTCRet tglobaladdr:$dst),
- (TCRETURNtg tglobaladdr:$dst)>;
+ (TCRETURNi tglobaladdr:$dst)>;
def: Pat<(HexagonTCRet texternalsym:$dst),
- (TCRETURNtext texternalsym:$dst)>;
+ (TCRETURNi texternalsym:$dst)>;
def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
- (TCRETURNr (i32 IntRegs:$dst))>;
+ (TCRETURNr IntRegs:$dst)>;
// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
def: Pat<(and (i32 IntRegs:$src1), 65535),
@@ -4900,19 +4915,19 @@ def: Pat<(add (i1 PredRegs:$src1), -1),
(C2_not PredRegs:$src1)>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s8ExtPred:$src3),
- (C2_muxii PredRegs:$src1, s8ExtPred:$src3, s8ImmPred:$src2)>;
+def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
// => r0 = C2_muxir(p0, r1, #i)
-def: Pat<(select (not (i1 PredRegs:$src1)), s8ExtPred:$src2,
+def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
(i32 IntRegs:$src3)),
- (C2_muxir PredRegs:$src1, IntRegs:$src3, s8ExtPred:$src2)>;
+ (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
// => r0 = C2_muxri (p0, #i, r1)
-def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s8ExtPred:$src3),
- (C2_muxri PredRegs:$src1, s8ExtPred:$src3, IntRegs:$src2)>;
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
+ (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
@@ -4952,26 +4967,6 @@ def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
(J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
bb:$offset)>;
-// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
-def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpt (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
-
-def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtu (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpgtup (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
- bb:$offset)>;
-
// Map from a 64-bit select to an emulated 64-bit mux.
// Hexagon does not support 64-bit MUXes; so emulate with combines.
def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
@@ -4987,10 +4982,6 @@ def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
(C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
(C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
-// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
-def : Pat<(i1 (load ADDRriS11_2:$addr)),
- (i1 (C2_tfrrp (i32 (L2_loadrb_io AddrFI:$addr, 0))))>;
-
// Map for truncating from 64 immediates to 32 bit immediates.
def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
(LoReg DoubleRegs:$src)>;
@@ -4999,42 +4990,10 @@ def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
(C2_tfrrp (LoReg DoubleRegs:$src))>;
-// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
-def : Pat<(truncstorei8 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
-def : Pat<(truncstorei16 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storerh_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
-def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr),
- (S2_storeri_io AddrFI:$addr, 0, (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src),
- subreg_loreg)))>;
-
-// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-
-// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
-def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (A2_tfrsi 1))>;
-
-// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
-def : Pat<(store (i1 PredRegs:$src1), ADDRriS11_2:$addr),
- (S2_storerb_io AddrFI:$addr, 0, (i32 (C2_muxii (i1 PredRegs:$src1), 1, 0)) )>;
-
// rs <= rt -> !(rs > rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>;
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
// rs <= rt -> !(rs > rt).
def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
@@ -5048,13 +5007,8 @@ def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// Hexagon_TODO: We should improve on this.
// rs != rt -> !(rs == rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpeqi IntRegs:$src1, s10ExtPred:$src2))>;
-
-// Map cmpne(Rs) -> !cmpeqe(Rs).
-// rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)))))>;
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
// Convert setne back to xor for hexagon since we compute w/ pred registers.
def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
@@ -5072,8 +5026,8 @@ def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
let AddedComplexity = 30 in
-def: Pat<(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>;
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
// rss >= rtt -> !(rtt > rss).
@@ -5084,20 +5038,21 @@ def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
// rs < rt -> !(rs >= rt).
let AddedComplexity = 30 in
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1,
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
(C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u8ExtPred:$src2))>;
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
// Generate cmpgtu(Rs, #u9)
-def: Pat<(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, u9ExtPred:$src2)>;
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
// Map from Rs >= Rt -> !(Rt > Rs).
// rs >= rt -> !(rt > rs).
@@ -5118,11 +5073,6 @@ def: Pat<(i32 (sext (i1 PredRegs:$src1))),
def: Pat<(i64 (sext (i1 PredRegs:$src1))),
(A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
-// Convert sign-extended load back to load and sign extend.
-// i32 -> i64
-def: Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
- (i64 (A2_sxtw (L2_loadri_io AddrFI:$src1, 0)))>;
-
// Zero extends.
// i1 -> i32
def: Pat<(i32 (zext (i1 PredRegs:$src1))),
@@ -5136,12 +5086,6 @@ def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
(A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
-def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
- (i32 32))),
- (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
- (i64 (A2_combinew (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
- (L2_loadri_io AddrFI:$srcLow, 0)))>;
-
// Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
(A2_addp
@@ -5186,10 +5130,13 @@ let AddedComplexity = 100 in
def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
(i32 IntRegs:$src1)>;
-def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
-def : Pat<(HexagonWrapperJT tjumptable:$dst),
- (i32 (CONST32_set_jt tjumptable:$dst))>;
+def: Pat<(HexagonJT tjumptable:$dst),
+ (CONST32_set_jt tjumptable:$dst)>;
+def: Pat<(HexagonCP tconstpool :$dst),
+ (CONST32_set_jt tconstpool:$dst)>;
// XTYPE/SHIFT
//
@@ -5626,6 +5573,43 @@ let hasNewValue = 1 in {
def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
+
+def SDTHexagonINSERT_ri : SDTypeProfile<1, 4, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+def SDTHexagonINSERT_rd : SDTypeProfile<1, 4, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
+def SDTHexagonINSERT_riv : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i64>]>;
+def SDTHexagonINSERT_rdv : SDTypeProfile<1, 3, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>,
+ SDTCisVT<3, i64>]>;
+def HexagonINSERT_ri : SDNode<"HexagonISD::INSERT_ri", SDTHexagonINSERT_ri>;
+def HexagonINSERT_rd : SDNode<"HexagonISD::INSERT_rd", SDTHexagonINSERT_rd>;
+def HexagonINSERT_riv: SDNode<"HexagonISD::INSERT_riv", SDTHexagonINSERT_riv>;
+def HexagonINSERT_rdv: SDNode<"HexagonISD::INSERT_rdv", SDTHexagonINSERT_rdv>;
+
+def: Pat<(HexagonINSERT_ri I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
+
+def: Pat<(HexagonINSERT_rd I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
+
+def: Pat<(HexagonINSERT_riv I32:$Rs, I32:$Rt, I64:$Ru),
+ (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+
+def: Pat<(HexagonINSERT_rdv I64:$Rs, I64:$Rt, I64:$Ru),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+
//===----------------------------------------------------------------------===//
// Template class for 'extract bitfield' instructions
//===----------------------------------------------------------------------===//
@@ -5692,6 +5676,37 @@ let hasNewValue = 1 in {
def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
}
+def SDTHexagonEXTRACTU_ri : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTU_rd : SDTypeProfile<1, 3, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTU_riv : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<2, i64>]>;
+def SDTHexagonEXTRACTU_rdv : SDTypeProfile<1, 2, [SDTCisVT<0, i64>,
+ SDTCisVT<1, i64>,
+ SDTCisVT<2, i64>]>;
+def HexagonEXTRACTU_ri : SDNode<"HexagonISD::EXTRACTU_ri", SDTHexagonEXTRACTU_ri>;
+def HexagonEXTRACTU_rd : SDNode<"HexagonISD::EXTRACTU_rd", SDTHexagonEXTRACTU_rd>;
+def HexagonEXTRACTU_riv: SDNode<"HexagonISD::EXTRACTU_riv", SDTHexagonEXTRACTU_riv>;
+def HexagonEXTRACTU_rdv: SDNode<"HexagonISD::EXTRACTU_rdv", SDTHexagonEXTRACTU_rdv>;
+
+def: Pat<(HexagonEXTRACTU_ri I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
+ (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
+
+def: Pat<(HexagonEXTRACTU_rd I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
+ (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
+
+def: Pat<(HexagonEXTRACTU_riv I32:$src1, I64:$src2),
+ (S2_extractu_rp I32:$src1, I64:$src2)>;
+
+def: Pat<(HexagonEXTRACTU_rdv I64:$src1, I64:$src2),
+ (S2_extractup_rp I64:$src1, I64:$src2)>;
+
// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
(M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
@@ -5728,6 +5743,22 @@ def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
//===----------------------------------------------------------------------===//
+// Template class for 'table index' instructions which are assembler mapped
+// to their :raw format.
+//===----------------------------------------------------------------------===//
+let isPseudo = 1 in
+class tableidx_goodsyntax <string mnemonic>
+ : SInst <(outs IntRegs:$Rx),
+ (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+ "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
+ [], "$Rx = $_dst_" >;
+
+def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
+def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
+def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
+def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
+
+//===----------------------------------------------------------------------===//
// V3 Instructions +
//===----------------------------------------------------------------------===//
@@ -5761,4 +5792,4 @@ include "HexagonInstrInfoV5.td"
// ALU32/64/Vector +
//===----------------------------------------------------------------------===///
-include "HexagonInstrInfoVector.td" \ No newline at end of file
+include "HexagonInstrInfoVector.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 0e4dde3..918b482 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -11,6 +11,25 @@
//
//===----------------------------------------------------------------------===//
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
+
def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
@@ -137,6 +156,9 @@ def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -247,10 +269,10 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpeqi IntRegs:$Rs, s8ExtPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s8ExtPred:$s8)))),
- (A4_rcmpneqi IntRegs:$Rs, s8ExtPred:$s8)>;
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
// Preserve the S2_tstbit_r generation
def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
@@ -292,16 +314,15 @@ let opExtendable = 1 in
def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
"$Rdd = combine(#$s8, $Rs)">;
-def HexagonWrapperCombineRI_V4 :
- SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineIR_V4 :
- SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
+ (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
-def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
- (A4_combineri IntRegs:$r, s8ExtPred:$i)>;
-
-def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
- (A4_combineir s8ExtPred:$i, IntRegs:$r)>;
+def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
+ (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
+}
// A4_combineii: Set two small immediates.
let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
@@ -322,7 +343,7 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
// The complexity of the combine with two immediates should be greater than
// the complexity of a combine involving a register.
let AddedComplexity = 75 in
-def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u6ExtPred:$u6),
+def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
(A4_combineii imm:$s8, imm:$u6)>;
//===----------------------------------------------------------------------===//
@@ -346,20 +367,22 @@ multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
PatLeaf ImmPred, InstHexagon MI> {
def: Pat<(VT (Load AddrFI:$fi)),
(VT (ValueMod (MI AddrFI:$fi, 0)))>;
+ def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+ (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
(VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
def: Pat<(VT (Load (i32 IntRegs:$Rs))),
(VT (ValueMod (MI IntRegs:$Rs, 0)))>;
}
-defm: Loadxm_pat<extloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>;
-defm: Loadxm_pat<zextloadi1, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi8, i64, Zext64, s11_0ExtPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi16, i64, Zext64, s11_1ExtPred, L2_loadruh_io>;
-defm: Loadxm_pat<sextloadi8, i64, Sext64, s11_0ExtPred, L2_loadrb_io>;
-defm: Loadxm_pat<sextloadi16, i64, Sext64, s11_1ExtPred, L2_loadrh_io>;
+defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
@@ -635,19 +658,6 @@ def: Pat<(i64 (zext (i1 PredRegs:$src1))),
def: Pat<(i64 (zext (i32 IntRegs:$src1))),
(Zext64 IntRegs:$src1)>;
-// zext i32->i64
-def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>;
-
-let AddedComplexity = 100 in
-def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
- (i64 (A4_combineir 0, (L2_loadri_io IntRegs:$src1,
- s11_2ExtPred:$offset)))>;
-
-// anyext i32->i64
-def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
- (i64 (A4_combineir 0, (L2_loadri_io AddrFI:$src1, 0)))>;
-
//===----------------------------------------------------------------------===//
// LD -
//===----------------------------------------------------------------------===//
@@ -768,8 +778,8 @@ multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
PatFrag stOp> {
def : Pat<(stOp (VT RC:$src4),
(add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
- u0AlwaysExtPred:$src3)),
- (MI IntRegs:$src1, u2ImmPred:$src2, u0AlwaysExtPred:$src3, RC:$src4)>;
+ u32ImmPred:$src3)),
+ (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
def : Pat<(stOp (VT RC:$src4),
(add (shl IntRegs:$src1, u2ImmPred:$src2),
@@ -1157,17 +1167,17 @@ let AddedComplexity = 40 in {
// is not extendable. This could cause problems during removing the frame
// indices, since the offset with respect to R29/R30 may not fit in the
// u6 field.
- def: Storexm_add_pat<truncstorei8, s8ExtPred, u6_0ImmPred, ToImmByte,
+ def: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
S4_storeirb_io>;
- def: Storexm_add_pat<truncstorei16, s8ExtPred, u6_1ImmPred, ToImmHalf,
+ def: Storexm_add_pat<truncstorei16, s32ImmPred, u6_1ImmPred, ToImmHalf,
S4_storeirh_io>;
- def: Storexm_add_pat<store, s8ExtPred, u6_2ImmPred, ToImmWord,
+ def: Storexm_add_pat<store, s32ImmPred, u6_2ImmPred, ToImmWord,
S4_storeiri_io>;
}
-def: Storexm_simple_pat<truncstorei8, s8ExtPred, ToImmByte, S4_storeirb_io>;
-def: Storexm_simple_pat<truncstorei16, s8ExtPred, ToImmHalf, S4_storeirh_io>;
-def: Storexm_simple_pat<store, s8ExtPred, ToImmWord, S4_storeiri_io>;
+def: Storexm_simple_pat<truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store, s32ImmPred, ToImmWord, S4_storeiri_io>;
// memb(Rx++#s4:0:circ(Mu))=Rt
// memb(Rx++I:circ(Mu))=Rt
@@ -1798,6 +1808,49 @@ def: LogLogNot_pat<or, and, C4_or_andn>;
def: LogLogNot_pat<or, or, C4_or_orn>;
//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonPICAdd
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_HexagonGOTAdd
+ : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+
+def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
+def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
+def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternal>;
+def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalJT>;
+def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
+ SDT_HexagonGOTAddInternalBA>;
+
+// PIC: Map from a block address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
+def: Pat<(Hexagonpic_add texternalsym:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a jump table address computation to a PC-relative add
+def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+// PIC: Map from a GOT-relative symbol reference to a load
+def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
+ (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
+
+// PIC: Map from a static symbol reference to a PC-relative add
+def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
+ (C4_addipc u32ImmPred:$src1)>;
+
+//===----------------------------------------------------------------------===//
// CR -
//===----------------------------------------------------------------------===//
@@ -1836,7 +1889,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
"$Rd = add($Rs, add($Ru, #$s6))" ,
[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s6_16ExtPred:$s6)))],
+ (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -1877,19 +1930,19 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
}
// Rd=add(Rs,sub(#s6,Ru))
-def: Pat<(add (i32 IntRegs:$src1), (sub s6_10ExtPred:$src2,
+def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
(i32 IntRegs:$src3))),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Rd=sub(add(Rs,#s6),Ru)
-def: Pat<(sub (add (i32 IntRegs:$src1), s6_10ExtPred:$src2),
+def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
(i32 IntRegs:$src3)),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Rd=add(sub(Rs,Ru),#s6)
def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
- (s6_10ExtPred:$src2)),
- (S4_subaddi IntRegs:$src1, s6_10ExtPred:$src2, IntRegs:$src3)>;
+ (s32ImmPred:$src2)),
+ (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
// Add or subtract doublewords with carry.
@@ -2042,7 +2095,7 @@ def S4_or_andix:
(ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
"$Rx = or($Ru, and($_src_, #$s10))" ,
[(set (i32 IntRegs:$Rx),
- (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s10ExtPred:$s10)))] ,
+ (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
"$_src_ = $Rx", ALU64_tc_2_SLOT23> {
bits<5> Rx;
bits<5> Ru;
@@ -2187,7 +2240,7 @@ class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
(ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
"$Rx |= "#mnemonic#"($Rs, #$s10)",
[(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
- (OpNode (i32 IntRegs:$Rs), s10ExtPred:$s10)))],
+ (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
"$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
bits<5> Rx;
bits<5> Rs;
@@ -2349,7 +2402,7 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
"$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
[(set (i32 IntRegs:$Rd),
(add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
- u6ExtPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
bits<5> Rd;
bits<6> u6;
bits<5> Rs;
@@ -2374,7 +2427,7 @@ def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
(ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
"$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
[(set (i32 IntRegs:$Rd),
- (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u6ExtPred:$u6))],
+ (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
"", ALU64_tc_3x_SLOT23>, ImmRegRel {
bits<5> Rd;
bits<6> u6;
@@ -2424,7 +2477,7 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
CextOpcode = "ADD_MPY", InputType = "imm" in
-def M4_mpyri_addr : T_AddMpy<0b1, u6ExtPred,
+def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
(ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
// Rx=add(Ru,mpyi(Rx,Rs))
@@ -2447,17 +2500,6 @@ def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
let Inst{20-16} = Rs;
}
-// Rd=add(##,mpyi(Rs,#U6))
-def : Pat <(add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyri_addi tglobaladdr:$src1, IntRegs:$src2,
- u6ImmPred:$src3))>;
-
-// Rd=add(##,mpyi(Rs,Rt))
-def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- (HexagonCONST32 tglobaladdr:$src1)),
- (i32 (M4_mpyrr_addi tglobaladdr:$src1, IntRegs:$src2,
- IntRegs:$src3))>;
// Vector reduce multiply word by signed half (32x16)
//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
@@ -2569,7 +2611,7 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
: MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
"$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
[(set (i32 IntRegs:$Rd),
- (Op (Sh I32:$Rx, u5ImmPred:$U5), u8ExtPred:$u8))],
+ (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
"$Rd = $Rx", Itin> {
bits<5> Rd;
@@ -2904,7 +2946,7 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
// mem[bh](Rs+#u6) += #U5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon MI, SDNode OpNode> {
let AddedComplexity = 180 in
def: Pat<(stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend),
@@ -2912,24 +2954,24 @@ multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
(MI IntRegs:$addr, 0, u5ImmPred:$addend)>;
let AddedComplexity = 190 in
- def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)),
+ def: Pat<(stOp (OpNode (ldOp (add IntRegs:$base, ImmPred:$offset)),
u5ImmPred:$addend),
- (add IntRegs:$base, ExtPred:$offset)),
- (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>;
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, u5ImmPred:$addend)>;
}
-multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred,
+multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
InstHexagon addMI, InstHexagon subMI> {
- defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>;
- defm: MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, addMI, add>;
+ defm: MemOpi_u5Pats<ldOp, stOp, ImmPred, subMI, sub>;
}
multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ defm: MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
L4_iadd_memoph_io, L4_isub_memoph_io>;
// Byte
- defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ defm: MemOpi_u5ALUOp <ldOpByte, truncstorei8, u32ImmPred,
L4_iadd_memopb_io, L4_isub_memopb_io>;
}
@@ -2939,7 +2981,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOpi_u5ALUOp <load, store, u6_2ExtPred, L4_iadd_memopw_io,
+ defm: MemOpi_u5ALUOp <load, store, u30_2ImmPred, L4_iadd_memopw_io,
L4_isub_memopw_io>;
}
@@ -2950,7 +2992,7 @@ let Predicates = [UseMEMOP] in {
// mem[bh](Rs+#u6) += #m5
//===----------------------------------------------------------------------===//
-multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
+multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ImmPred,
PatLeaf immPred, SDNodeXForm xformFunc,
InstHexagon MI> {
let AddedComplexity = 190 in
@@ -2958,18 +3000,18 @@ multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
(MI IntRegs:$addr, 0, (xformFunc immPred:$subend))>;
let AddedComplexity = 195 in
- def: Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)),
+ def: Pat<(stOp (add (ldOp (add IntRegs:$base, ImmPred:$offset)),
immPred:$subend),
- (add IntRegs:$base, extPred:$offset)),
- (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>;
+ (add IntRegs:$base, ImmPred:$offset)),
+ (MI IntRegs:$base, ImmPred:$offset, (xformFunc immPred:$subend))>;
}
multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred,
+ defm: MemOpi_m5Pats <ldOpHalf, truncstorei16, u31_1ImmPred, m5HImmPred,
MEMOPIMM_HALF, L4_isub_memoph_io>;
// Byte
- defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred,
+ defm: MemOpi_m5Pats <ldOpByte, truncstorei8, u32ImmPred, m5BImmPred,
MEMOPIMM_BYTE, L4_isub_memopb_io>;
}
@@ -2979,7 +3021,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred,
+ defm: MemOpi_m5Pats <load, store, u30_2ImmPred, m5ImmPred,
MEMOPIMM, L4_isub_memopw_io>;
}
@@ -3008,16 +3050,16 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf> {
// Byte - clrbit
- defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred,
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u32ImmPred,
CLRMEMIMM_BYTE, L4_iand_memopb_io, and>;
// Byte - setbit
- defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred,
+ defm: MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u32ImmPred,
SETMEMIMM_BYTE, L4_ior_memopb_io, or>;
// Half Word - clrbit
- defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred,
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u31_1ImmPred,
CLRMEMIMM_SHORT, L4_iand_memoph_io, and>;
// Half Word - setbit
- defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred,
+ defm: MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u31_1ImmPred,
SETMEMIMM_SHORT, L4_ior_memoph_io, or>;
}
@@ -3030,9 +3072,9 @@ let Predicates = [UseMEMOP] in {
// memw(Rs+#0) = [clrbit|setbit](#U5)
// memw(Rs+#u6:2) = [clrbit|setbit](#U5)
- defm: MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, CLRMEMIMM,
+ defm: MemOpi_bitPats<load, store, Clr5ImmPred, u30_2ImmPred, CLRMEMIMM,
L4_iand_memopw_io, and>;
- defm: MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, SETMEMIMM,
+ defm: MemOpi_bitPats<load, store, Set5ImmPred, u30_2ImmPred, SETMEMIMM,
L4_ior_memopw_io, or>;
}
@@ -3070,11 +3112,11 @@ multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf extPred,
multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
// Half Word
- defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred,
+ defm: MemOPr_ALUOp <ldOpHalf, truncstorei16, u31_1ImmPred,
L4_add_memoph_io, L4_sub_memoph_io,
L4_and_memoph_io, L4_or_memoph_io>;
// Byte
- defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u6ExtPred,
+ defm: MemOPr_ALUOp <ldOpByte, truncstorei8, u32ImmPred,
L4_add_memopb_io, L4_sub_memopb_io,
L4_and_memopb_io, L4_or_memopb_io>;
}
@@ -3086,7 +3128,7 @@ let Predicates = [UseMEMOP] in {
defm: MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend
defm: MemOPr_ExtType<extloadi8, extloadi16>; // any extend
// Word
- defm: MemOPr_ALUOp <load, store, u6_2ExtPred, L4_add_memopw_io,
+ defm: MemOPr_ALUOp <load, store, u30_2ImmPred, L4_add_memopw_io,
L4_sub_memopw_io, L4_and_memopw_io, L4_or_memopw_io>;
}
@@ -3110,23 +3152,23 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10Ext>;
def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10Ext>;
def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
-def : T_CMP_pat <C4_cmpneqi, setne, s10ExtPred>;
-def : T_CMP_pat <C4_cmpltei, setle, s10ExtPred>;
+def : T_CMP_pat <C4_cmpneqi, setne, s32ImmPred>;
+def : T_CMP_pat <C4_cmpltei, setle, s32ImmPred>;
def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
// rs <= rt -> !(rs > rt).
/*
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s10ExtPred:$src2))>;
-// (C4_cmpltei IntRegs:$src1, s10ExtPred:$src2)>;
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
+// (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
*/
// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
- (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s8ExtPred:$src2))>;
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
- (C4_cmpneqi IntRegs:$src1, s10ExtPred:$src2)>;
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
+ (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
// SDNode for converting immediate C to C-1.
def DEC_CONST_BYTE : SDNodeXForm<imm, [{
@@ -3136,168 +3178,6 @@ def DEC_CONST_BYTE : SDNodeXForm<imm, [{
}]>;
// For the sequence
-// zext( seteq ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setne ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.eq(Rs, #u8)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeqi (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 0, 1))>;
-
-// For the sequence
-// zext( seteq (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (seteq (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>;
-
-// For the sequence
-// zext( setne (Rs, and(Rt, 255)))
-// Generate
-// Pd=cmpb.eq(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setne (i32 IntRegs:$Rt),
- (i32 (and (i32 IntRegs:$Rs), 255)))))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbeq (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 255), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 255)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setugt ( and(Rs, 254), u8))
-// Generate
-// Pd=cmpb.gtu(Rs, #u8)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 (and (i32 IntRegs:$Rs), 254)),
- u8ExtPred:$u8)))),
- (i32 (TFR_condset_ii (i1 (A4_cmpbgtui (i32 IntRegs:$Rs),
- (u8ExtPred:$u8))),
- 1, 0))>;
-
-// For the sequence
-// zext( setult ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setult (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>;
-
-// For the sequence
-// zext( setlt ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setlt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 1, 0))>;
-
-// For the sequence
-// zext( setugt ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-def : Pat <(i32 (zext (i1 (setugt (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 1, 0))>;
-
-// This pattern interefers with coremark performance, not implementing at this
-// time.
-// For the sequence
-// zext( setgt ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#1
-// if (!Pd.new) Rd=#0
-
-// For the sequence
-// zext( setuge ( Rs, Rt))
-// Generate
-// Pd=cmp.ltu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.ltu(Rs, Rt) -> cmp.gtu(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setuge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>;
-
-// For the sequence
-// zext( setge ( Rs, Rt))
-// Generate
-// Pd=cmp.lt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-// cmp.lt(Rs, Rt) -> cmp.gt(Rt, Rs)
-def : Pat <(i32 (zext (i1 (setge (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rt),
- (i32 IntRegs:$Rs))),
- 0, 1))>;
-
-// For the sequence
-// zext( setule ( Rs, Rt))
-// Generate
-// Pd=cmp.gtu(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setule (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgtu (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
-// zext( setle ( Rs, Rt))
-// Generate
-// Pd=cmp.gt(Rs, Rt)
-// if (Pd.new) Rd=#0
-// if (!Pd.new) Rd=#1
-def : Pat <(i32 (zext (i1 (setle (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))),
- (i32 (TFR_condset_ii (i1 (C2_cmpgt (i32 IntRegs:$Rs),
- (i32 IntRegs:$Rt))),
- 0, 1))>;
-
-// For the sequence
// zext( setult ( and(Rs, 255), u8))
// Use the isdigit transformation below
@@ -3381,26 +3261,17 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
// Restore registers and dealloc return function call.
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "jump $dst",
- []>;
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
}
// Restore registers and dealloc frame before a tail call.
let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst",
- []>;
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
}
// Save registers function call.
let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
- def SAVE_REGISTERS_CALL_V4 : JInst<(outs),
- (ins calltarget:$dst),
- "call $dst // Save_calle_saved_registers",
- []>;
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
}
//===----------------------------------------------------------------------===//
@@ -3472,7 +3343,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
//===----------------------------------------------------------------------===//
class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1, isHalf>,
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3513,7 +3384,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
isNewValue = 1, opNewValue = 1 in
class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
- : NVInst_V4<(outs), (ins u0AlwaysExt:$addr, IntRegs:$src),
+ : NVInst_V4<(outs), (ins u32Imm:$addr, IntRegs:$src),
mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src.new",
[], "", V2LDST_tc_st_SLOT0> {
bits<19> addr;
@@ -3743,7 +3614,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u0AlwaysExt, 1>, AddrModeRel {
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3903,17 +3774,17 @@ def: Pat<(i64 (ctlz I64:$src1)), (Zext64 (S2_cl0p I64:$src1))>;
def: Pat<(i64 (cttz I64:$src1)), (Zext64 (S2_ct0p I64:$src1))>;
let AddedComplexity = 30 in {
- def: Storea_pat<truncstorei8, I32, u0AlwaysExtPred, S2_storerbabs>;
- def: Storea_pat<truncstorei16, I32, u0AlwaysExtPred, S2_storerhabs>;
- def: Storea_pat<store, I32, u0AlwaysExtPred, S2_storeriabs>;
+ def: Storea_pat<truncstorei8, I32, u32ImmPred, S2_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
+ def: Storea_pat<store, I32, u32ImmPred, S2_storeriabs>;
}
let AddedComplexity = 30 in {
- def: Loada_pat<load, i32, u0AlwaysExtPred, L4_loadri_abs>;
- def: Loada_pat<sextloadi8, i32, u0AlwaysExtPred, L4_loadrb_abs>;
- def: Loada_pat<zextloadi8, i32, u0AlwaysExtPred, L4_loadrub_abs>;
- def: Loada_pat<sextloadi16, i32, u0AlwaysExtPred, L4_loadrh_abs>;
- def: Loada_pat<zextloadi16, i32, u0AlwaysExtPred, L4_loadruh_abs>;
+ def: Loada_pat<load, i32, u32ImmPred, L4_loadri_abs>;
+ def: Loada_pat<sextloadi8, i32, u32ImmPred, L4_loadrb_abs>;
+ def: Loada_pat<zextloadi8, i32, u32ImmPred, L4_loadrub_abs>;
+ def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
+ def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
}
// Indexed store word - global address.
@@ -4012,6 +3883,18 @@ def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
+let Constraints = "@earlyclobber $dst" in
+def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
+ IntRegs:$c, IntRegs:$d),
+ ".error \"Should never try to emit Insert4\"",
+ [(set (i64 DoubleRegs:$dst),
+ (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+ (i32 16)),
+ (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+ (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+ (i32 32))),
+ (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
+
//===----------------------------------------------------------------------===//
// :raw for of boundscheck:hi:lo insns
//===----------------------------------------------------------------------===//
@@ -4116,7 +3999,7 @@ class CJInst_tstbit_R0<string px, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = tstbit($Rs, #0); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
@@ -4162,7 +4045,7 @@ class CJInst_RR<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, $Rt); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<4> Rt;
bits<11> r9_2;
@@ -4216,7 +4099,7 @@ class CJInst_RU5<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs, #$U5); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<5> U5;
bits<11> r9_2;
@@ -4271,7 +4154,7 @@ class CJInst_Rn1<string px, string op, bit np, string tnt>
: InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
""#px#" = cmp."#op#"($Rs,#-1); if ("
#!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND, TypeCOMPOUND> {
+ [], "", COMPOUND, TypeCOMPOUND>, OpcodeHexagon {
bits<4> Rs;
bits<11> r9_2;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 19b0935..337f4ea 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -139,11 +139,11 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
let Inst{20-16} = Rss;
}
-defm: Loadx_pat<load, f32, s11_2ExtPred, L2_loadri_io>;
-defm: Loadx_pat<load, f64, s11_3ExtPred, L2_loadrd_io>;
+defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
-defm: Storex_pat<store, F32, s11_2ExtPred, S2_storeri_io>;
-defm: Storex_pat<store, F64, s11_3ExtPred, S2_storerd_io>;
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
def: Storex_simple_pat<store, F32, S2_storeri_io>;
def: Storex_simple_pat<store, F64, S2_storerd_io>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
index 6e67b6e..f4fb946 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -20,6 +20,34 @@ def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a IntRegs:$src))),
+ (b IntRegs:$src)>;
+ def : Pat <(a (bitconvert (b IntRegs:$src))),
+ (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+ (b DoubleRegs:$src)>;
+ def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+ (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_32<v2i16, v4i8>;
+
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+defm : bitconvert_64<v8i8, v4i16>;
+defm : bitconvert_64<v8i8, v2i32>;
+defm : bitconvert_64<v4i16, v2i32>;
+
+
// Vector shift support. Vector shifting in Hexagon is rather different
// from internal representation of LLVM.
// LLVM assumes all shifts (in vector case) will have the form
@@ -44,6 +72,12 @@ class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
let Inst{12-8} = src2;
}
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
@@ -52,6 +86,87 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+ : Pat <(Op Type:$Rss, Type:$Rtt),
+ (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh, add, V4I16>;
+def: VArith_pat <A2_vaddw, add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh, sub, V4I16>;
+def: VArith_pat <A2_vsubw, sub, V2I32>;
+
+def: VArith_pat <A2_and, and, V2I16>;
+def: VArith_pat <A2_xor, xor, V2I16>;
+def: VArith_pat <A2_or, or, V2I16>;
+
+def: VArith_pat <A2_andp, and, V8I8>;
+def: VArith_pat <A2_andp, and, V4I16>;
+def: VArith_pat <A2_andp, and, V2I32>;
+def: VArith_pat <A2_orp, or, V8I8>;
+def: VArith_pat <A2_orp, or, V4I16>;
+def: VArith_pat <A2_orp, or, V2I32>;
+def: VArith_pat <A2_xorp, xor, V8I8>;
+def: VArith_pat <A2_xorp, xor, V4I16>;
+def: VArith_pat <A2_xorp, xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
+ (i32 u5ImmPred:$c))))),
+ (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
+ (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
// Vector shift words by register
def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
@@ -63,3 +178,306 @@ def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(Op Value:$Rs, I32:$Rt),
+ (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+ (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+ : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+ (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def VMULW : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW\"",
+ [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+
+let isPseudo = 1 in
+def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"Should never try to emit VMULW_ACC\"",
+ [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+ "$Rd = $Rx">;
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+ (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+ (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+ (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+ (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+ ValueType CmpTy>
+ : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+ (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+ (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+ (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+ (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+ (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+ (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32. We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+ (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+ (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+ (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+ (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+
+class shuffler<SDNode Op, string Str>
+ : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
+ "$a = " # Str # "($b, $c)",
+ [(set (i64 DoubleRegs:$a),
+ (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
+ "", S_3op_tc_1_SLOT23>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+ : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+ (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+ (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+ (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index c0551e8..4275230 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -690,16 +690,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
-def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs),
- (I32:$Rt))),
+def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
(i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
// Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
// Shift halfword
def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -720,17 +719,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
(i32 (C2_cmpgti (I32:$src1),
- (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
(i32 (C2_cmpgtui (I32:$src1),
- (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -1258,6 +1257,30 @@ def: Pat<(i32 (int_hexagon_S2_storew_locked (I32:$Rs), (I32:$Rt))),
def: Pat<(i32 (int_hexagon_S4_stored_locked (I32:$Rs), (I64:$Rt))),
(i32 (C2_tfrpr (S4_stored_locked (I32:$Rs), (I64:$Rt))))>;
+/********************************************************************
+* ST
+*********************************************************************/
+
+class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru)>;
+
+def : T_stb_pat <S2_storerh_pbr_pseudo, int_hexagon_brev_sth, I32>;
+def : T_stb_pat <S2_storerb_pbr_pseudo, int_hexagon_brev_stb, I32>;
+def : T_stb_pat <S2_storeri_pbr_pseudo, int_hexagon_brev_stw, I32>;
+def : T_stb_pat <S2_storerf_pbr_pseudo, int_hexagon_brev_sthhi, I32>;
+def : T_stb_pat <S2_storerd_pbr_pseudo, int_hexagon_brev_std, I64>;
+
+class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
+ : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
+ (MI I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s)>;
+
+def: T_stc_pat<S2_storerb_pci_pseudo, int_hexagon_circ_stb, s4_0ImmPred, I32>;
+def: T_stc_pat<S2_storerh_pci_pseudo, int_hexagon_circ_sth, s4_1ImmPred, I32>;
+def: T_stc_pat<S2_storeri_pci_pseudo, int_hexagon_circ_stw, s4_2ImmPred, I32>;
+def: T_stc_pat<S2_storerd_pci_pseudo, int_hexagon_circ_std, s4_3ImmPred, I64>;
+def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index 8d068eb..c80a188 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -234,17 +234,17 @@ def: T_RR_pat<A4_orn, int_hexagon_A4_orn>;
*********************************************************************/
// Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s8ExtPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s8ExtPred>;
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
/********************************************************************
* ALU32/PRED *
*********************************************************************/
// Compare
-def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s10ExtPred>;
-def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s10ExtPred>;
-def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u9ExtPred>;
+def : T_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
+def : T_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
+def : T_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
def: T_RR_pat<A4_rcmpeq, int_hexagon_A4_rcmpeq>;
def: T_RR_pat<A4_rcmpneq, int_hexagon_A4_rcmpneq>;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 806d448..81af4db 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -40,6 +40,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -199,10 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
// of registers by individual passes in the backend. At this time,
// we don't know the scope of usage and definitions of these
// instructions.
- if (MII->getOpcode() == Hexagon::TFR_condset_ii ||
- MII->getOpcode() == Hexagon::TFR_condset_ri ||
- MII->getOpcode() == Hexagon::TFR_condset_ir ||
- MII->getOpcode() == Hexagon::LDriw_pred ||
+ if (MII->getOpcode() == Hexagon::LDriw_pred ||
MII->getOpcode() == Hexagon::STriw_pred)
return false;
}
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 318ca72..450f594 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -66,162 +66,131 @@ def nOneImm : Operand<i32>;
// Immediate predicates
//
def s32ImmPred : PatLeaf<(i32 imm), [{
- // s32ImmPred predicate - True if the immediate fits in a 32-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
}]>;
-def s32_24ImmPred : PatLeaf<(i32 imm), [{
- // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x1000000.
+def s32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isInt<32>(v);
+}]>;
+
+def s31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<31,1>(v);
+}]>;
+
+def s22_10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<22,10>(v);
+}]>;
+
+def s8_24ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<32,24>(v);
+ return isShiftedInt<8,24>(v);
}]>;
-def s32_16s8ImmPred : PatLeaf<(i32 imm), [{
- // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
- // extended field that is a multiple of 0x10000.
+def s16_16ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<24,16>(v);
+ return isShiftedInt<16,16>(v);
}]>;
def s26_6ImmPred : PatLeaf<(i32 imm), [{
- // s26_6ImmPred predicate - True if the immediate fits in a 32-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<26,6>(v);
}]>;
-
def s16ImmPred : PatLeaf<(i32 imm), [{
- // s16ImmPred predicate - True if the immediate fits in a 16-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<16>(v);
}]>;
-
def s13ImmPred : PatLeaf<(i32 imm), [{
- // s13ImmPred predicate - True if the immediate fits in a 13-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<13>(v);
}]>;
-
def s12ImmPred : PatLeaf<(i32 imm), [{
- // s12ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<12>(v);
}]>;
def s11_0ImmPred : PatLeaf<(i32 imm), [{
- // s11_0ImmPred predicate - True if the immediate fits in a 11-bit
- // sign extended field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
}]>;
-
def s11_1ImmPred : PatLeaf<(i32 imm), [{
- // s11_1ImmPred predicate - True if the immediate fits in a 12-bit
- // sign extended field and is a multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,1>(v);
}]>;
-
def s11_2ImmPred : PatLeaf<(i32 imm), [{
- // s11_2ImmPred predicate - True if the immediate fits in a 13-bit
- // sign extended field and is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,2>(v);
}]>;
-
def s11_3ImmPred : PatLeaf<(i32 imm), [{
- // s11_3ImmPred predicate - True if the immediate fits in a 14-bit
- // sign extended field and is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<11,3>(v);
}]>;
-
def s10ImmPred : PatLeaf<(i32 imm), [{
- // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<10>(v);
}]>;
-
def s9ImmPred : PatLeaf<(i32 imm), [{
- // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v);
}]>;
def m9ImmPred : PatLeaf<(i32 imm), [{
- // m9ImmPred predicate - True if the immediate fits in a 9-bit magnitude
- // field. The range of m9 is -255 to 255.
int64_t v = (int64_t)N->getSExtValue();
return isInt<9>(v) && (v != -256);
}]>;
def s8ImmPred : PatLeaf<(i32 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s8Imm64Pred : PatLeaf<(i64 imm), [{
- // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
}]>;
-
def s6ImmPred : PatLeaf<(i32 imm), [{
- // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<6>(v);
}]>;
-
def s4_0ImmPred : PatLeaf<(i32 imm), [{
- // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isInt<4>(v);
}]>;
-
def s4_1ImmPred : PatLeaf<(i32 imm), [{
- // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,1>(v);
}]>;
-
def s4_2ImmPred : PatLeaf<(i32 imm), [{
- // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,2>(v);
}]>;
-
def s4_3ImmPred : PatLeaf<(i32 imm), [{
- // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
- // field that is a multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedInt<4,3>(v);
}]>;
@@ -233,56 +202,61 @@ def u64ImmPred : PatLeaf<(i64 imm), [{
}]>;
def u32ImmPred : PatLeaf<(i32 imm), [{
- // u32ImmPred predicate - True if the immediate fits in a 32-bit field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<32>(v);
}]>;
+def u32_0ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<32>(v);
+}]>;
+
+def u31_1ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<31,1>(v);
+}]>;
+
+def u30_2ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<30,2>(v);
+}]>;
+
+def u29_3ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedUInt<29,3>(v);
+}]>;
+
def u26_6ImmPred : PatLeaf<(i32 imm), [{
- // u26_6ImmPred - True if the immediate fits in a 32-bit field and
- // is a multiple of 64.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<26,6>(v);
}]>;
def u16ImmPred : PatLeaf<(i32 imm), [{
- // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
def u16_s8ImmPred : PatLeaf<(i32 imm), [{
- // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
- // extended s8 field.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<16,8>(v);
}]>;
def u16_0ImmPred : PatLeaf<(i32 imm), [{
- // True if the immediate fits in a 16-bit unsigned field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
def u11_3ImmPred : PatLeaf<(i32 imm), [{
- // True if the immediate fits in a 14-bit unsigned field, and the lowest
- // three bits are 0.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<11,3>(v);
}]>;
def u9ImmPred : PatLeaf<(i32 imm), [{
- // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
}]>;
-
def u8ImmPred : PatLeaf<(i32 imm), [{
- // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<8>(v);
}]>;
@@ -294,81 +268,56 @@ def u7StrictPosImmPred : ImmLeaf<i32, [{
}]>;
def u7ImmPred : PatLeaf<(i32 imm), [{
- // u7ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<7>(v);
}]>;
-
def u6ImmPred : PatLeaf<(i32 imm), [{
- // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_0ImmPred : PatLeaf<(i32 imm), [{
- // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
- // field. Same as u6ImmPred.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<6>(v);
}]>;
def u6_1ImmPred : PatLeaf<(i32 imm), [{
- // u6_1ImmPred predicate - True if the immediate fits in a 7-bit unsigned
- // field that is 1 bit alinged - multiple of 2.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,1>(v);
}]>;
def u6_2ImmPred : PatLeaf<(i32 imm), [{
- // u6_2ImmPred predicate - True if the immediate fits in a 8-bit unsigned
- // field that is 2 bits alinged - multiple of 4.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,2>(v);
}]>;
def u6_3ImmPred : PatLeaf<(i32 imm), [{
- // u6_3ImmPred predicate - True if the immediate fits in a 9-bit unsigned
- // field that is 3 bits alinged - multiple of 8.
int64_t v = (int64_t)N->getSExtValue();
return isShiftedUInt<6,3>(v);
}]>;
def u5ImmPred : PatLeaf<(i32 imm), [{
- // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<5>(v);
}]>;
def u4ImmPred : PatLeaf<(i32 imm), [{
- // u4ImmPred predicate - True if the immediate fits in a 4-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<4>(v);
}]>;
def u3ImmPred : PatLeaf<(i32 imm), [{
- // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<3>(v);
}]>;
-
def u2ImmPred : PatLeaf<(i32 imm), [{
- // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<2>(v);
}]>;
-
def u1ImmPred : PatLeaf<(i1 imm), [{
- // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
- // field.
int64_t v = (int64_t)N->getSExtValue();
return isUInt<1>(v);
}]>;
@@ -511,212 +460,6 @@ let PrintMethod = "printExtOperand" in {
def u6_3Ext : Operand<i32>;
}
-let PrintMethod = "printImmOperand" in
-def u0AlwaysExt : Operand<i32>;
-
-// Predicates for constant extendable operands
-def s16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<16>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<10>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s8_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
-}]>;
-
-def s6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s6_16ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 16-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<16>(v);
-}]>;
-
-def s6_10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can't fit in a 10-bit signed field. This is required to avoid
- // unnecessary constant extenders.
- return isConstExtProfitable(Node) && !isInt<10>(v);
-}]>;
-
-def s11_0ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<11>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
-}]>;
-
-def s11_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<12>(v))
- return isShiftedInt<11,1>(v);
-
- // Return true if extending this immediate is profitable and the low 1 bit
- // is zero (2-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
-}]>;
-
-def s11_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<13>(v))
- return isShiftedInt<11,2>(v);
-
- // Return true if extending this immediate is profitable and the low 2-bits
- // are zero (4-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 4) == 0);
-}]>;
-
-def s11_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<14>(v))
- return isShiftedInt<11,3>(v);
-
- // Return true if extending this immediate is profitable and the low 3-bits
- // are zero (8-byte aligned).
- return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 8) == 0);
-}]>;
-
-def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
- // Predicate for an unsigned 32-bit value that always needs to be extended.
- if (isConstExtProfitable(Node)) {
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
- }
- return false;
-}]>;
-
-def u6ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<6>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u7ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<7>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u8ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u9ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
-}]>;
-
-def u6_1ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<7>(v))
- return isShiftedUInt<6,1>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
-}]>;
-
-def u6_2ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return isShiftedUInt<6,2>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
-}]>;
-
-def u6_3ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return isShiftedUInt<6,3>(v);
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
-}]>;
-
// This complex pattern exists only to create a machine instruction operand
// of type "frame index". There doesn't seem to be a way to do that directly
@@ -729,41 +472,8 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
-// Addressing modes.
-
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
-
// Address operands.
-def MEMrr : Operand<i32> {
- let PrintMethod = "printMEMrrOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri : Operand<i32> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
- let PrintMethod = "printMEMriOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
- let PrintMethod = "printFrameIndexOperand";
- let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
let PrintMethod = "printGlobalOperand" in {
def globaladdress : Operand<i32>;
def globaladdressExt : Operand<i32>;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index afd3a17..503bfdb 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -271,15 +271,8 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
switch (Op) {
case Hexagon::C2_mux:
case Hexagon::C2_muxii:
- case Hexagon::TFR_condset_ii:
NewOp = Op;
break;
- case Hexagon::TFR_condset_ri:
- NewOp = Hexagon::TFR_condset_ir;
- break;
- case Hexagon::TFR_condset_ir:
- NewOp = Hexagon::TFR_condset_ri;
- break;
case Hexagon::C2_muxri:
NewOp = Hexagon::C2_muxir;
break;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 3df98d6..86eaee8 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -37,11 +37,8 @@
using namespace llvm;
-
-HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st)
- : HexagonGenRegisterInfo(Hexagon::R31),
- Subtarget(st) {
-}
+HexagonRegisterInfo::HexagonRegisterInfo()
+ : HexagonGenRegisterInfo(Hexagon::R31) {}
const MCPhysReg *
HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -51,7 +48,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
};
- switch(Subtarget.getHexagonArchVersion()) {
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
return CalleeSavedRegsV3;
@@ -89,7 +86,7 @@ HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&Hexagon::IntRegsRegClass, &Hexagon::IntRegsRegClass,
};
- switch(Subtarget.getHexagonArchVersion()) {
+ switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
return CalleeSavedRegClassesV3;
@@ -122,7 +119,9 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset -= 2 * Hexagon_WordSize;
}
- const unsigned FrameSize = MFI.getStackSize();
+ unsigned FrameSize = MFI.getStackSize();
+ if (MI.getOpcode() == Hexagon::TFR_FI)
+ MI.setDesc(TII.get(Hexagon::A2_addi));
if (!MFI.hasVarSizedObjects() &&
TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index a83b502..dc6dd2a 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -37,19 +37,11 @@
#define HEXAGON_RESERVED_REG_2 Hexagon::R11
namespace llvm {
-
-class HexagonSubtarget;
-class HexagonInstrInfo;
-class Type;
-
struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
- HexagonSubtarget &Subtarget;
-
- HexagonRegisterInfo(HexagonSubtarget &st);
+ HexagonRegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const TargetRegisterClass* const*
getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const;
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index ce6a39a..1a4c7ae 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -71,6 +71,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
return true;
const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
@@ -82,82 +83,78 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
while (MII != MIE) {
MachineInstr *MI = MII;
int Opc = MI->getOpcode();
- if (Opc == Hexagon::CONST32_set) {
+ if (Opc == Hexagon::CONST32_set_jt) {
int DestReg = MI->getOperand(0).getReg();
MachineOperand &Symbol = MI->getOperand (1);
-
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
- // MBB->erase returns the iterator to the next instruction, which is the
- // one we want to process next
- MII = MBB->erase (MI);
- continue;
- }
- else if (Opc == Hexagon::CONST32_set_jt) {
- int DestReg = MI->getOperand(0).getReg();
- MachineOperand &Symbol = MI->getOperand (1);
+ TII->get(Hexagon::A2_tfrsi), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_jt), DestReg).addOperand(Symbol);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_jt), DestReg).addOperand(Symbol);
// MBB->erase returns the iterator to the next instruction, which is the
// one we want to process next
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_Label) {
+ else if (Opc == Hexagon::CONST32_Int_Real &&
+ MI->getOperand(1).isBlockAddress()) {
int DestReg = MI->getOperand(0).getReg();
MachineOperand &Symbol = MI->getOperand (1);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LO_PIC), DestReg).addOperand(Symbol);
+ TII->get(Hexagon::LO), DestReg).addOperand(Symbol);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HI_PIC), DestReg).addOperand(Symbol);
+ TII->get(Hexagon::HI), DestReg).addOperand(Symbol);
// MBB->erase returns the iterator to the next instruction, which is the
// one we want to process next
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST32_Int_Real) {
+
+ else if (Opc == Hexagon::CONST32_Int_Real ||
+ Opc == Hexagon::CONST32_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestReg).addImm(ImmValue);
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestReg).addImm(ImmValue);
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST32_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestReg).addImm(ImmValue);
MII = MBB->erase (MI);
continue;
}
- else if (Opc == Hexagon::CONST64_Int_Real) {
+ else if (Opc == Hexagon::CONST64_Int_Real ||
+ Opc == Hexagon::CONST64_Float_Real) {
int DestReg = MI->getOperand(0).getReg();
- int64_t ImmValue = MI->getOperand(1).getImm ();
- unsigned DestLo = Fn.getSubtarget().getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_loreg);
- unsigned DestHi = Fn.getSubtarget().getRegisterInfo()->getSubReg(
- DestReg, Hexagon::subreg_hireg);
+
+ // We have to convert an FP immediate into its corresponding integer
+ // representation
+ int64_t ImmValue;
+ if (Opc == Hexagon::CONST64_Float_Real) {
+ APFloat Val = MI->getOperand(1).getFPImm()->getValueAPF();
+ ImmValue = *Val.bitcastToAPInt().getRawData();
+ }
+ else
+ ImmValue = MI->getOperand(1).getImm();
+
+ unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
+ unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
int32_t LowWord = (ImmValue & 0xFFFFFFFF);
int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
- // Lower Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestLo).addImm(LowWord);
- // Lower Registers Higher Half
+ BuildMI(*MBB, MII, MI->getDebugLoc(),
+ TII->get(Hexagon::A2_tfrsi), DestLo).addImm(LowWord);
BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestLo).addImm(LowWord);
- // Higher Registers Lower Half
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::LOi), DestHi).addImm(HighWord);
- // Higher Registers Higher Half.
- BuildMI (*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::HIi), DestHi).addImm(HighWord);
+ TII->get(Hexagon::A2_tfrsi), DestHi).addImm(HighWord);
MII = MBB->erase (MI);
continue;
- }
+ }
++MII;
}
}
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
deleted file mode 100644
index 8873bb9..0000000
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//
-//===----------------------------------------------------------------------===//
-// This pass tries to provide opportunities for better optimization of muxes.
-// The default code generated for something like: flag = (a == b) ? 1 : 3;
-// would be:
-//
-// {p0 = cmp.eq(r0,r1)}
-// {r3 = mux(p0,#1,#3)}
-//
-// This requires two packets. If we use .new predicated immediate transfers,
-// then we can do this in a single packet, e.g.:
-//
-// {p0 = cmp.eq(r0,r1)
-// if (p0.new) r3 = #1
-// if (!p0.new) r3 = #3}
-//
-// Note that the conditional assignments are not generated in .new form here.
-// We assume opptimisically that they will be formed later.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "xfer"
-
-namespace llvm {
- void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
-}
-
-
-namespace {
-
-class HexagonSplitTFRCondSets : public MachineFunctionPass {
- public:
- static char ID;
- HexagonSplitTFRCondSets() : MachineFunctionPass(ID) {
- initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
- }
-
- const char *getPassName() const override {
- return "Hexagon Split TFRCondSets";
- }
- bool runOnMachineFunction(MachineFunction &Fn) override;
-};
-
-
-char HexagonSplitTFRCondSets::ID = 0;
-
-
-bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
-
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
-
- // Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
- // Traverse the basic block.
- for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
- ++MII) {
- MachineInstr *MI = MII;
- switch(MI->getOpcode()) {
- case Hexagon::TFR_condset_ri: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(2).getReg();
-
- // Do not emit the predicated copy if the source and the destination
- // is the same register.
- if (DestReg != SrcReg1) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrt), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
- }
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(3).getImm());
-
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ir: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg2 = MI->getOperand(3).getReg();
-
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit), DestReg).
- addReg(MI->getOperand(1).getReg()).
- addImm(MI->getOperand(2).getImm());
-
- // Do not emit the predicated copy if the source and
- // the destination is the same register.
- if (DestReg != SrcReg2) {
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::A2_tfrf), DestReg).
- addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
- }
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- case Hexagon::TFR_condset_ii: {
- int DestReg = MI->getOperand(0).getReg();
- int SrcReg1 = MI->getOperand(1).getReg();
-
- int Immed1 = MI->getOperand(2).getImm();
- int Immed2 = MI->getOperand(3).getImm();
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveit),
- DestReg).addReg(SrcReg1).addImm(Immed1);
- BuildMI(*MBB, MII, MI->getDebugLoc(),
- TII->get(Hexagon::C2_cmoveif),
- DestReg).addReg(SrcReg1).addImm(Immed2);
- MII = MBB->erase(MI);
- --MII;
- break;
- }
- }
- }
- }
- return true;
-}
-
-}
-
-//===----------------------------------------------------------------------===//
-// Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-static void initializePassOnce(PassRegistry &Registry) {
- const char *Name = "Hexagon Split TFRCondSets";
- PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
- &HexagonSplitTFRCondSets::ID, nullptr, false,
- false);
- Registry.registerPass(*PI, true);
-}
-
-void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializePassOnce)
-}
-
-FunctionPass *llvm::createHexagonSplitTFRCondSets() {
- return new HexagonSplitTFRCondSets();
-}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 380f023..1717ae3 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,6 +48,10 @@ EnableIEEERndNear(
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
// If the programmer has not specified a Hexagon version, default to -mv4.
@@ -91,3 +95,9 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
// Pin the vtable to this file.
void HexagonSubtarget::anchor() {}
+
+bool HexagonSubtarget::enableMachineScheduler() const {
+ if (DisableHexagonMISched.getNumOccurrences())
+ return !DisableHexagonMISched;
+ return true;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 57de546..780567b 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -85,6 +85,11 @@ public:
bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool enableMachineScheduler() const override;
+ // Always use the TargetLowering default scheduler.
+ // FIXME: This will use the vliw scheduler which is probably just hurting
+ // compiler time and will be removed eventually anyway.
+ bool enableMachineSchedDefaultSched() const override { return false; }
const std::string &getCPUString () const { return CPUString; }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 64f75a3..48b0bc8 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -29,10 +29,6 @@ using namespace llvm;
static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
-static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
-
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon CFG Optimization"));
@@ -69,9 +65,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<HexagonTargetObjectFile>()),
- DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
@@ -82,16 +79,7 @@ namespace {
class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- // FIXME: Rather than calling enablePass(&MachineSchedulerID) below, define
- // HexagonSubtarget::enableMachineScheduler() { return true; }.
- // That will bypass the SelectionDAG VLIW scheduler, which is probably just
- // hurting compile time and will be removed eventually anyway.
- if (DisableHexagonMISched)
- disablePass(&MachineSchedulerID);
- else
- enablePass(&MachineSchedulerID);
- }
+ : TargetPassConfig(TM, PM) {}
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
@@ -159,9 +147,6 @@ void HexagonPassConfig::addPreEmitPass() {
// Expand Spill code for predicate registers.
addPass(createHexagonExpandPredSpillCode(), false);
- // Split up TFRcondsets into conditional transfers.
- addPass(createHexagonSplitTFRCondSets(), false);
-
// Create Packets.
if (!NoOpt) {
if (!DisableHardwareLoops)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index e0b3a9b..5774f7e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -24,7 +24,6 @@ class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment.
HexagonSubtarget Subtarget;
public:
@@ -33,8 +32,7 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~HexagonTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const HexagonSubtarget *getSubtargetImpl() const override {
+ const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
static unsigned getModuleMatchQuality(const Module &M);
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c123640..4ca628e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -389,7 +389,9 @@ static bool IsLoopN(MachineInstr *MI) {
/// callee-saved register.
static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ for (const MCPhysReg *CSR =
+ TRI->getCalleeSavedRegs(MI->getParent()->getParent());
+ *CSR; ++CSR) {
unsigned CalleeSavedReg = *CSR;
if (MI->modifiesRegister(CalleeSavedReg, TRI))
return true;
@@ -401,10 +403,7 @@ static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
// or new-value store.
bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if ( isCondInst(MI) || QII->mayBeNewStore(MI))
- return true;
- else
- return false;
+ return isCondInst(MI) || QII->mayBeNewStore(MI);
}
bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 56c9dc7..4a3ac8c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "hexagon-elf-writer"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index a5a09ba..eac7d6d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -49,9 +49,8 @@ void emitLittleEndian(uint64_t Binary, raw_ostream &OS) {
}
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
- MCSubtargetInfo const &aMST,
MCContext &aMCT)
- : MST(aMST), MCT(aMCT), MCII (aMII) {}
+ : MCT(aMCT), MCII(aMII) {}
void HexagonMCCodeEmitter::EncodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -75,15 +74,10 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
llvm_unreachable("Only Immediates and Registers implemented right now");
}
-MCSubtargetInfo const &HexagonMCCodeEmitter::getSubtargetInfo() const {
- return MST;
-}
-
MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT) {
- return new HexagonMCCodeEmitter(MII, MST, MCT);
+ return new HexagonMCCodeEmitter(MII, MCT);
}
#include "HexagonGenMCCodeEmitter.inc"
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index db1d707..768c10e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -26,13 +26,11 @@
namespace llvm {
class HexagonMCCodeEmitter : public MCCodeEmitter {
- MCSubtargetInfo const &MST;
MCContext &MCT;
MCInstrInfo const &MCII;
public:
- HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCSubtargetInfo const &aMST,
- MCContext &aMCT);
+ HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
MCSubtargetInfo const &getSubtargetInfo() const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 09a305b..c63bf32 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -47,15 +47,6 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
return X;
}
-static MCStreamer *
-createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
- MCELFStreamer *ES = new MCELFStreamer(Context, MAB, OS, CE);
- return ES;
-}
-
-
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
@@ -75,16 +66,6 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCStreamer *createMCStreamer(Target const &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- MCSubtargetInfo const &STI, bool RelaxAll) {
- MCStreamer *ES = createHexagonELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
- new MCTargetStreamer(*ES);
- return ES;
-}
-
-
static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
@@ -135,7 +116,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the asm backend
TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
createHexagonAsmBackend);
-
- // Register the obj streamer
- TargetRegistry::RegisterMCObjectStreamer(TheHexagonTarget, createMCStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index f074b65..17072d9 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -34,7 +34,6 @@ MCInstrInfo *createHexagonMCInstrInfo();
MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
MCRegisterInfo const &MRI,
- MCSubtargetInfo const &MST,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(Target const &T,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 586f5d9..241f1d6 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
#define LLVM_LIB_TARGET_MSP430_MCTARGETDESC_MSP430MCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class Target;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 2f70cde..591ceb5 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -104,7 +104,7 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -280,12 +280,12 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
}
bool MSP430DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (!SelectAddr(Op, Op0, Op1))
return true;
break;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 9266c3b..68868b6 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,6 +102,12 @@ namespace llvm {
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 3f88a69..0cfa4a4 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -26,8 +26,7 @@ public:
MSP430RegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass*
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 7468519..3dda3bf 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -25,7 +25,8 @@ using namespace llvm;
void MSP430Subtarget::anchor() { }
-MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+MSP430Subtarget &
+MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
ParseSubtargetFeatures("generic", FS);
return *this;
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 348e672..d6cc4ae 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -30,10 +30,11 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, "e-m:e-p:16:16-i32:16:32-a:16-n8:16", TT, CPU, FS,
+ Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
// FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-a:16-n8:16"), Subtarget(TT, CPU, FS, *this) {
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index c6a6a70..6ccd30d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -25,7 +25,6 @@ namespace llvm {
///
class MSP430TargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
MSP430Subtarget Subtarget;
public:
@@ -35,8 +34,7 @@ public:
CodeGenOpt::Level OL);
~MSP430TargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const MSP430Subtarget *getSubtargetImpl() const override {
+ const MSP430Subtarget *getSubtargetImpl(const Function &F) const override {
return &Subtarget;
}
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 1040bf7..6401bc1 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <memory>
using namespace llvm;
@@ -53,7 +54,13 @@ public:
}
unsigned getATRegNum() const { return ATReg; }
- bool setATReg(unsigned Reg);
+ bool setATReg(unsigned Reg) {
+ if (Reg > 31)
+ return false;
+
+ ATReg = Reg;
+ return true;
+ }
bool isReorder() const { return Reorder; }
void setReorder() { Reorder = true; }
@@ -193,6 +200,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -236,6 +246,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
StringRef Directive);
+ bool parseInternalDirectiveReallowModule();
+
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
bool eatComma(StringRef ErrorStr);
@@ -1365,22 +1377,11 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // If this instruction has a delay slot and .set reorder is active,
+ // emit a NOP after it.
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
- // If this instruction has a delay slot and .set reorder is active,
- // emit a NOP after it.
Instructions.push_back(Inst);
- MCInst NopInst;
- if (hasShortDelaySlot(Inst.getOpcode())) {
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- } else {
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateImm(0));
- }
- Instructions.push_back(NopInst);
+ createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
return false;
}
@@ -1584,10 +1585,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
bool MipsAsmParser::needsExpansion(MCInst &Inst) {
switch (Inst.getOpcode()) {
- case Mips::LoadImm32Reg:
- case Mips::LoadAddr32Imm:
- case Mips::LoadAddr32Reg:
- case Mips::LoadImm64Reg:
+ case Mips::LoadImm32:
+ case Mips::LoadImm64:
+ case Mips::LoadAddrImm32:
+ case Mips::LoadAddrReg32:
case Mips::B_MM_Pseudo:
case Mips::LWM_MM:
case Mips::SWM_MM:
@@ -1603,17 +1604,17 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
default: llvm_unreachable("unimplemented expansion");
- case Mips::LoadImm32Reg:
+ case Mips::LoadImm32:
return expandLoadImm(Inst, IDLoc, Instructions);
- case Mips::LoadImm64Reg:
+ case Mips::LoadImm64:
if (!isGP64bit()) {
Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
}
return expandLoadImm(Inst, IDLoc, Instructions);
- case Mips::LoadAddr32Imm:
+ case Mips::LoadAddrImm32:
return expandLoadAddressImm(Inst, IDLoc, Instructions);
- case Mips::LoadAddr32Reg:
+ case Mips::LoadAddrReg32:
return expandLoadAddressReg(Inst, IDLoc, Instructions);
case Mips::B_MM_Pseudo:
return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
@@ -1982,14 +1983,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
}
Instructions.push_back(Inst);
- if (AssemblerOptions.back()->isReorder()) {
- // If .set reorder is active, emit a NOP after the branch instruction.
- MCInst NopInst;
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
- Instructions.push_back(NopInst);
- }
+ // If .set reorder is active, emit a NOP after the branch instruction.
+ if (AssemblerOptions.back()->isReorder())
+ createNop(true, IDLoc, Instructions);
+
return false;
}
@@ -2132,6 +2129,22 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
return false;
}
+void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst NopInst;
+ if (hasShortDelaySlot) {
+ NopInst.setOpcode(Mips::MOVE16_MM);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ } else {
+ NopInst.setOpcode(Mips::SLL);
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ NopInst.addOperand(MCOperand::CreateImm(0));
+ }
+ Instructions.push_back(NopInst);
+}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// As described by the Mips32r2 spec, the registers Rd and Rs for
// jalr.hb must be different.
@@ -2370,14 +2383,6 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
return CC;
}
-bool MipsAssemblerOptions::setATReg(unsigned Reg) {
- if (Reg > 31)
- return false;
-
- ATReg = Reg;
- return true;
-}
-
int MipsAsmParser::getATReg(SMLoc Loc) {
int AT = AssemblerOptions.back()->getATRegNum();
if (AT == 0)
@@ -4429,9 +4434,25 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".module")
return parseDirectiveModule();
+ if (IDVal == ".llvm_internal_mips_reallow_module_directive")
+ return parseInternalDirectiveReallowModule();
+
return true;
}
+bool MipsAsmParser::parseInternalDirectiveReallowModule() {
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ getTargetStreamer().reallowModuleDirective();
+
+ getParser().Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
extern "C" void LLVMInitializeMipsAsmParser() {
RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index dd0e54c..243b73d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -32,10 +32,9 @@ class MipsAsmBackend : public MCAsmBackend {
bool Is64Bit; // 32 or 64 bit words
public:
- MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle,
- bool _is64Bit)
- : MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle),
- Is64Bit(_is64Bit) {}
+ MipsAsmBackend(const Target &T, Triple::OSType OSType, bool IsLittle,
+ bool Is64Bit)
+ : MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index e14dc8d..a68bf16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -38,9 +38,9 @@ namespace {
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
- : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
- /*HasRelocationAddend*/ (_isN64) ? true : false,
- /*IsN64*/ _isN64) {}
+ : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+ /*HasRelocationAddend*/ _isN64,
+ /*IsN64*/ _isN64) {}
MipsELFObjectWriter::~MipsELFObjectWriter() {}
@@ -54,9 +54,11 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
switch (Kind) {
default:
llvm_unreachable("invalid fixup kind!");
+ case Mips::fixup_Mips_32:
case FK_Data_4:
Type = ELF::R_MIPS_32;
break;
+ case Mips::fixup_Mips_64:
case FK_Data_8:
Type = ELF::R_MIPS_64;
break;
@@ -262,12 +264,10 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
bool IsLittleEndian,
bool Is64Bit) {
- MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
- (Is64Bit) ? true : false,
- IsLittleEndian);
+ MCELFObjectTargetWriter *MOTW =
+ new MipsELFObjectWriter(Is64Bit, OSABI, Is64Bit, IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 18c4a20..93f60df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -69,11 +69,9 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
I->EmitMipsOptionRecord();
}
-namespace llvm {
-MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll) {
- return new MipsELFStreamer(Context, MAB, OS, Emitter, STI);
-}
+MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new MipsELFStreamer(Context, MAB, OS, Emitter);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index bc76d8a..6b834c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -34,7 +34,7 @@ class MipsELFStreamer : public MCELFStreamer {
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
+ MCCodeEmitter *Emitter)
: MCELFStreamer(Context, MAB, OS, Emitter) {
RegInfoRecord = new MipsRegInfoRecord(this, Context);
@@ -69,6 +69,6 @@ public:
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll);
+ bool RelaxAll);
} // namespace llvm.
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index fa8d6a6..e601963 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -18,7 +18,7 @@ namespace Mips {
// one can have multiple fixup types for a given relocation and thus need
// to be uniquely named.
//
- // This table *must* be in the save order of
+ // This table *must* be in the same order of
// MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
// in MipsAsmBackend.cpp.
//
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 8208725..1c2f2da 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -35,14 +35,12 @@
namespace llvm {
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, false);
}
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, true);
}
@@ -451,7 +449,7 @@ getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
-getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
+getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
int64_t Res;
@@ -500,6 +498,9 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
default: llvm_unreachable("Unknown fixup kind!");
break;
+ case MCSymbolRefExpr::VK_None:
+ FixupKind = Mips::fixup_Mips_32; // FIXME: This is ok for O32/N32 but not N64.
+ break;
case MCSymbolRefExpr::VK_Mips_GPOFF_HI :
FixupKind = Mips::fixup_Mips_GPOFF_HI;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e756b47..e6b5be7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -25,7 +25,6 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
bool RelaxAll);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 9b56067..6f3f37b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -106,96 +106,73 @@ static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
return new MipsInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+ MCAsmBackend &MAB, raw_ostream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
MCStreamer *S;
- if (!Triple(TT).isOSNaCl())
- S = createMipsELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
+ if (!T.isOSNaCl())
+ S = createMipsELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
else
- S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, STI, RelaxAll);
- new MipsTargetELFStreamer(*S, STI);
+ S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
return S;
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new MipsTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createMipsAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new MipsTargetAsmStreamer(S, OS);
}
static MCTargetStreamer *createMipsNullTargetStreamer(MCStreamer &S) {
return new MipsTargetStreamer(S);
}
+static MCTargetStreamer *
+createMipsObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new MipsTargetELFStreamer(S, STI);
+}
+
extern "C" void LLVMInitializeMipsTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn Y(TheMipselTarget, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn A(TheMips64Target, createMipsMCAsmInfo);
- RegisterMCAsmInfoFn B(TheMips64elTarget, createMipsMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheMipsTarget,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMipselTarget,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMips64Target,
- createMipsMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheMips64elTarget,
- createMipsMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
- createMipsMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMipselTarget, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMips64Target, createMipsMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheMips64elTarget,
- createMipsMCRegisterInfo);
+ for (Target *T : {&TheMipsTarget, &TheMipselTarget, &TheMips64Target,
+ &TheMips64elTarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createMipsMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createMipsMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createMipsMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createMipsMCRegisterInfo);
+
+ // Register the elf streamer.
+ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createMipsAsmTargetStreamer);
+
+ TargetRegistry::RegisterNullTargetStreamer(*T,
+ createMipsNullTargetStreamer);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createMipsMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createMipsMCInstPrinter);
+
+ TargetRegistry::RegisterObjectTargetStreamer(
+ *T, createMipsObjectTargetStreamer);
+ }
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
- createMipsMCCodeEmitterEB);
- TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
- createMipsMCCodeEmitterEL);
- TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
- createMipsMCCodeEmitterEB);
- TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
- createMipsMCCodeEmitterEL);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
- createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheMipsTarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMipselTarget, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
-
- TargetRegistry::RegisterNullTargetStreamer(TheMipsTarget,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMipselTarget,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMips64Target,
- createMipsNullTargetStreamer);
- TargetRegistry::RegisterNullTargetStreamer(TheMips64elTarget,
- createMipsNullTargetStreamer);
+ for (Target *T : {&TheMipsTarget, &TheMips64Target})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEB);
+
+ for (Target *T : {&TheMipselTarget, &TheMips64elTarget})
+ TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEL);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
@@ -207,23 +184,4 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
createMipsAsmBackendEL64);
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMipselTarget,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMips64Target,
- createMipsMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheMips64elTarget,
- createMipsMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheMipsTarget,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMips64Target,
- createMipsMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheMips64elTarget,
- createMipsMCInstPrinter);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 9528b4e..92f394a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -35,11 +35,9 @@ extern Target TheMips64elTarget;
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createMipsAsmBackendEB32(const Target &T,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 92b8455..1adfdf9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -37,8 +37,8 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
class MipsNaClELFStreamer : public MipsELFStreamer {
public:
MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter, const MCSubtargetInfo &STI)
- : MipsELFStreamer(Context, TAB, OS, Emitter, STI), PendingCall(false) {}
+ MCCodeEmitter *Emitter)
+ : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
~MipsNaClELFStreamer() {}
@@ -254,10 +254,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS,
MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
bool RelaxAll) {
- MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter,
- STI);
+ MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 64d7cab..5790a5c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -62,7 +62,7 @@ void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
void MipsTargetStreamer::emitDirectiveSetArch(StringRef Arch) {
forbidModuleDirective();
}
-void MipsTargetStreamer::emitDirectiveSetMips0() {}
+void MipsTargetStreamer::emitDirectiveSetMips0() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips1() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips3() { forbidModuleDirective(); }
@@ -78,8 +78,8 @@ void MipsTargetStreamer::emitDirectiveSetMips64R2() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R3() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R5() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetMips64R6() { forbidModuleDirective(); }
-void MipsTargetStreamer::emitDirectiveSetPop() {}
-void MipsTargetStreamer::emitDirectiveSetPush() {}
+void MipsTargetStreamer::emitDirectiveSetPop() { forbidModuleDirective(); }
+void MipsTargetStreamer::emitDirectiveSetPush() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
@@ -91,6 +91,10 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
if (!Enabled && !IsO32ABI)
report_fatal_error("+nooddspreg is only valid for O32");
}
+void MipsTargetStreamer::emitDirectiveSetFp(
+ MipsABIFlagsSection::FpABIKind Value) {
+ forbidModuleDirective();
+}
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -198,7 +202,10 @@ void MipsTargetAsmStreamer::emitDirectiveSetArch(StringRef Arch) {
MipsTargetStreamer::emitDirectiveSetArch(Arch);
}
-void MipsTargetAsmStreamer::emitDirectiveSetMips0() { OS << "\t.set\tmips0\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetMips0() {
+ OS << "\t.set\tmips0\n";
+ MipsTargetStreamer::emitDirectiveSetMips0();
+}
void MipsTargetAsmStreamer::emitDirectiveSetMips1() {
OS << "\t.set\tmips1\n";
@@ -285,9 +292,15 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() {
MipsTargetStreamer::emitDirectiveSetNoDsp();
}
-void MipsTargetAsmStreamer::emitDirectiveSetPop() { OS << "\t.set\tpop\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPop() {
+ OS << "\t.set\tpop\n";
+ MipsTargetStreamer::emitDirectiveSetPop();
+}
-void MipsTargetAsmStreamer::emitDirectiveSetPush() { OS << "\t.set\tpush\n"; }
+void MipsTargetAsmStreamer::emitDirectiveSetPush() {
+ OS << "\t.set\tpush\n";
+ MipsTargetStreamer::emitDirectiveSetPush();
+}
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
@@ -346,15 +359,13 @@ void MipsTargetAsmStreamer::emitDirectiveModuleFP(
void MipsTargetAsmStreamer::emitDirectiveSetFp(
MipsABIFlagsSection::FpABIKind Value) {
+ MipsTargetStreamer::emitDirectiveSetFp(Value);
+
StringRef ModuleValue;
OS << "\t.set\tfp=";
OS << ABIFlagsSection.getFpABIString(Value) << "\n";
}
-void MipsTargetAsmStreamer::emitMipsAbiFlags() {
- // No action required for text output.
-}
-
void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
bool IsO32ABI) {
MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
@@ -367,10 +378,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
: MipsTargetStreamer(S), MicroMipsEnabled(false), STI(STI) {
MCAssembler &MCA = getStreamer().getAssembler();
- Triple T(STI.getTargetTriple());
- Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
- ? true
- : false;
+ Pic = MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
uint64_t Features = STI.getFeatureBits();
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index e20df2f..2aab739 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -642,8 +642,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
LW_FM_MM<0xc>;
/// Arithmetic Instructions (3-Operand, R-Type)
- def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd>, ADD_FM_MM<0, 0x150>;
- def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd>, ADD_FM_MM<0, 0x1d0>;
+ def ADDu_MM : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+ ADD_FM_MM<0, 0x150>;
+ def SUBu_MM : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
+ ADD_FM_MM<0, 0x1d0>;
def MUL_MM : MMRel, ArithLogicR<"mul", GPR32Opnd>, ADD_FM_MM<0, 0x210>;
def ADD_MM : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM_MM<0, 0x110>;
def SUB_MM : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM_MM<0, 0x190>;
@@ -883,6 +885,8 @@ def : MipsPat<(i32 immSExt16:$imm),
(ADDiu_MM ZERO, immSExt16:$imm)>;
def : MipsPat<(i32 immZExt16:$imm),
(ORi_MM ZERO, immZExt16:$imm)>;
+def : MipsPat<(not GPR32:$in),
+ (NOR_MM GPR32Opnd:$in, ZERO)>;
def : MipsPat<(add GPRMM16:$src, immSExtAddiur2:$imm),
(ADDIUR2_MM GPRMM16:$src, immSExtAddiur2:$imm)>;
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index cb09c1a..671d7a8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -20,8 +20,13 @@
namespace llvm {
class MipsTargetMachine;
+ class ModulePass;
class FunctionPass;
+ ModulePass *createMipsOs16Pass(MipsTargetMachine &TM);
+ ModulePass *createMips16HardFloatPass(MipsTargetMachine &TM);
+
+ FunctionPass *createMipsModuleISelDagPass(MipsTargetMachine &TM);
FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM);
FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 01c548e..ca24741 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -58,22 +58,22 @@ def MipsInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
- "Disable SVR4-style position-independent code.">;
+ "Disable SVR4-style position-independent code">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
- "General Purpose Registers are 64-bit wide.">;
+ "General Purpose Registers are 64-bit wide">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
- "Support 64-bit FP registers.">;
+ "Support 64-bit FP registers">;
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
- "Support for FPXX.">;
+ "Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
- "IEEE 754-2008 NaN encoding.">;
+ "IEEE 754-2008 NaN encoding">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
"registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
- "true", "Enable vector FPU instructions.">;
+ "true", "Enable vector FPU instructions">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
"Mips I ISA Support [highly experimental]">;
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 32dc90a..893fc7c 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "Mips16HardFloat.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
@@ -19,38 +19,51 @@
#include <algorithm>
#include <string>
-#define DEBUG_TYPE "mips16-hard-float"
+using namespace llvm;
-static void inlineAsmOut
- (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) {
- std::vector<llvm::Type *> AsmArgTypes;
- std::vector<llvm::Value*> AsmArgs;
- llvm::FunctionType *AsmFTy =
- llvm::FunctionType::get(Type::getVoidTy(C),
- AsmArgTypes, false);
- llvm::InlineAsm *IA =
- llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
- /* IsAlignStack */ false,
- llvm::InlineAsm::AD_ATT);
- CallInst::Create(IA, AsmArgs, "", BB);
-}
+#define DEBUG_TYPE "mips16-hard-float"
namespace {
+ class Mips16HardFloat : public ModulePass {
+ public:
+ static char ID;
-class InlineAsmHelper {
- LLVMContext &C;
- BasicBlock *BB;
-public:
- InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
- C(C_), BB(BB_) {
- }
+ Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
- void Out(StringRef AsmString) {
- inlineAsmOut(C, AsmString, BB);
- }
+ const char *getPassName() const override {
+ return "MIPS16 Hard Float Pass";
+ }
-};
+ bool runOnModule(Module &M) override;
+
+ protected:
+ const MipsTargetMachine &TM;
+ };
+
+ class InlineAsmHelper {
+ LLVMContext &C;
+ BasicBlock *BB;
+ public:
+ InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
+ C(C_), BB(BB_) {
+ }
+
+ void Out(StringRef AsmString) {
+ std::vector<llvm::Type *> AsmArgTypes;
+ std::vector<llvm::Value*> AsmArgs;
+
+ llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
+ AsmArgTypes, false);
+ llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
+ /* IsAlignStack */ false,
+ llvm::InlineAsm::AD_ATT);
+ CallInst::Create(IA, AsmArgs, "", BB);
+ }
+ };
+
+ char Mips16HardFloat::ID = 0;
}
+
//
// Return types that matter for hard float are:
// float, double, complex float, and complex double
@@ -154,11 +167,11 @@ static bool needsFPStubFromParams(Function &F) {
if (F.arg_size() >=1) {
Type *ArgType = F.getFunctionType()->getParamType(0);
switch (ArgType->getTypeID()) {
- case Type::FloatTyID:
- case Type::DoubleTyID:
- return true;
- default:
- break;
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ break;
}
}
return false;
@@ -182,10 +195,8 @@ static bool needsFPHelperFromSig(Function &F) {
// We swap between FP and Integer registers to allow Mips16 and Mips32 to
// interoperate
//
-
-static void swapFPIntParams
- (FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
- bool LE, bool ToFP) {
+static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
+ bool LE, bool ToFP) {
//LLVMContext &Context = M->getContext();
std::string MI = ToFP? "mtc1 ": "mfc1 ";
switch (PV) {
@@ -242,6 +253,7 @@ static void swapFPIntParams
return;
}
}
+
//
// Make sure that we know we already need a stub for this function.
// Having called needsFPHelperFromSig
@@ -297,8 +309,8 @@ static void assureFPCallStub(Function &F, Module *M,
break;
case CFRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ IAH.Out("mfc1 $$2,$$f0");
+ IAH.Out("mfc1 $$3,$$f2");
} else {
IAH.Out("mfc1 $$3,$$f0");
IAH.Out("mfc1 $$3,$$f2");
@@ -331,28 +343,27 @@ static void assureFPCallStub(Function &F, Module *M,
//
// Functions that are llvm intrinsics and don't need helpers.
//
-static const char *IntrinsicInline[] =
- {"fabs",
- "fabsf",
- "llvm.ceil.f32", "llvm.ceil.f64",
- "llvm.copysign.f32", "llvm.copysign.f64",
- "llvm.cos.f32", "llvm.cos.f64",
- "llvm.exp.f32", "llvm.exp.f64",
- "llvm.exp2.f32", "llvm.exp2.f64",
- "llvm.fabs.f32", "llvm.fabs.f64",
- "llvm.floor.f32", "llvm.floor.f64",
- "llvm.fma.f32", "llvm.fma.f64",
- "llvm.log.f32", "llvm.log.f64",
- "llvm.log10.f32", "llvm.log10.f64",
- "llvm.nearbyint.f32", "llvm.nearbyint.f64",
- "llvm.pow.f32", "llvm.pow.f64",
- "llvm.powi.f32", "llvm.powi.f64",
- "llvm.rint.f32", "llvm.rint.f64",
- "llvm.round.f32", "llvm.round.f64",
- "llvm.sin.f32", "llvm.sin.f64",
- "llvm.sqrt.f32", "llvm.sqrt.f64",
- "llvm.trunc.f32", "llvm.trunc.f64",
- };
+static const char *IntrinsicInline[] = {
+ "fabs", "fabsf",
+ "llvm.ceil.f32", "llvm.ceil.f64",
+ "llvm.copysign.f32", "llvm.copysign.f64",
+ "llvm.cos.f32", "llvm.cos.f64",
+ "llvm.exp.f32", "llvm.exp.f64",
+ "llvm.exp2.f32", "llvm.exp2.f64",
+ "llvm.fabs.f32", "llvm.fabs.f64",
+ "llvm.floor.f32", "llvm.floor.f64",
+ "llvm.fma.f32", "llvm.fma.f64",
+ "llvm.log.f32", "llvm.log.f64",
+ "llvm.log10.f32", "llvm.log10.f64",
+ "llvm.nearbyint.f32", "llvm.nearbyint.f64",
+ "llvm.pow.f32", "llvm.pow.f64",
+ "llvm.powi.f32", "llvm.powi.f64",
+ "llvm.rint.f32", "llvm.rint.f64",
+ "llvm.round.f32", "llvm.round.f64",
+ "llvm.sin.f32", "llvm.sin.f64",
+ "llvm.sqrt.f32", "llvm.sqrt.f64",
+ "llvm.trunc.f32", "llvm.trunc.f64",
+};
static bool isIntrinsicInline(Function *F) {
return std::binary_search(std::begin(IntrinsicInline),
@@ -384,9 +395,10 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Type *T = RVal->getType();
FPReturnVariant RV = whichFPReturnVariant(T);
if (RV == NoFPRet) continue;
- static const char* Helper[NoFPRet] =
- {"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
- "__mips16_ret_dc"};
+ static const char* Helper[NoFPRet] = {
+ "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
+ "__mips16_ret_dc"
+ };
const char *Name = Helper[RV];
AttributeSet A;
Value *Params[] = {RVal};
@@ -406,33 +418,33 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- const Value* V = CI->getCalledValue();
- const Type* T = nullptr;
- if (V) T = V->getType();
- const PointerType *PFT=nullptr;
- if (T) PFT = dyn_cast<PointerType>(T);
- const FunctionType *FT=nullptr;
- if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
- Function *F_ = CI->getCalledFunction();
- if (FT && needsFPReturnHelper(*FT) &&
- !(F_ && isIntrinsicInline(F_))) {
+ const Value* V = CI->getCalledValue();
+ const Type* T = nullptr;
+ if (V) T = V->getType();
+ const PointerType *PFT=nullptr;
+ if (T) PFT = dyn_cast<PointerType>(T);
+ const FunctionType *FT=nullptr;
+ if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
+ Function *F_ = CI->getCalledFunction();
+ if (FT && needsFPReturnHelper(*FT) &&
+ !(F_ && isIntrinsicInline(F_))) {
+ Modified=true;
+ F.addFnAttr("saveS2");
+ }
+ if (F_ && !isIntrinsicInline(F_)) {
+ // pic mode calls are handled by already defined
+ // helper functions
+ if (needsFPReturnHelper(*F_)) {
Modified=true;
F.addFnAttr("saveS2");
}
- if (F_ && !isIntrinsicInline(F_)) {
- // pic mode calls are handled by already defined
- // helper functions
- if (needsFPReturnHelper(*F_)) {
+ if (TM.getRelocationModel() != Reloc::PIC_ ) {
+ if (needsFPHelperFromSig(*F_)) {
+ assureFPCallStub(*F_, M, TM);
Modified=true;
- F.addFnAttr("saveS2");
- }
- if (TM.getRelocationModel() != Reloc::PIC_ ) {
- if (needsFPHelperFromSig(*F_)) {
- assureFPCallStub(*F_, M, TM);
- Modified=true;
- }
}
}
+ }
}
}
return Modified;
@@ -489,7 +501,6 @@ static void removeUseSoftFloat(Function &F) {
F.addAttributes(AttributeSet::FunctionIndex, A);
}
-namespace llvm {
//
// This pass only makes sense when the underlying chip has floating point but
@@ -530,11 +541,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
return Modified;
}
-char Mips16HardFloat::ID = 0;
-
-}
-ModulePass *llvm::createMips16HardFloat(MipsTargetMachine &TM) {
+ModulePass *llvm::createMips16HardFloatPass(MipsTargetMachine &TM) {
return new Mips16HardFloat(TM);
}
-
diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h
deleted file mode 100644
index 586cc25..0000000
--- a/lib/Target/Mips/Mips16HardFloat.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===---- Mips16HardFloat.h for Mips16 Hard Float --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a phase which implements part of the floating point
-// interoperability between Mips16 and Mips32 code.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-#define LLVM_LIB_TARGET_MIPS_MIPS16HARDFLOAT_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class Mips16HardFloat : public ModulePass {
-public:
- static char ID;
-
- Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
-
- const char *getPassName() const override { return "MIPS16 Hard Float Pass"; }
- bool runOnModule(Module &M) override;
-
-protected:
- const MipsTargetMachine &TM;
-};
-
-ModulePass *createMips16HardFloat(MipsTargetMachine &TM);
-
-}
-#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 976becc..00d4495 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -1,4 +1,3 @@
-
//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
//
// The LLVM Compiler Infrastructure
@@ -25,6 +24,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
@@ -32,7 +32,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-instrinfo"
Mips16InstrInfo::Mips16InstrInfo(const MipsSubtarget &STI)
- : MipsInstrInfo(STI, Mips::Bimm16), RI(STI) {}
+ : MipsInstrInfo(STI, Mips::Bimm16), RI() {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
return RI;
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e7d0c07..f9b7387 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -18,7 +18,7 @@
#include "MipsInstrInfo.h"
namespace llvm {
-
+class MipsSubtarget;
class Mips16InstrInfo : public MipsInstrInfo {
const Mips16RegisterInfo RI;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index c45acc4..ebd51d7 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -41,8 +41,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips16-registerinfo"
-Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST)
- : MipsRegisterInfo(ST) {}
+Mips16RegisterInfo::Mips16RegisterInfo() : MipsRegisterInfo() {}
bool Mips16RegisterInfo::requiresRegisterScavenging
(const MachineFunction &MF) const {
@@ -65,7 +64,7 @@ bool Mips16RegisterInfo::saveScavengerRegister
const TargetRegisterClass *RC,
unsigned Reg) const {
DebugLoc DL;
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
TII.copyPhysReg(MBB, I, DL, Mips::T0, Reg, true);
TII.copyPhysReg(MBB, UseMI, DL, Reg, Mips::T0, true);
return true;
@@ -106,7 +105,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
FrameReg = Mips::SP;
else {
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
if (TFI->hasFP(MF)) {
FrameReg = Mips::S0;
}
@@ -140,7 +139,7 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
DebugLoc DL = II->getDebugLoc();
unsigned NewImm;
const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo *>(Subtarget.getInstrInfo());
+ *static_cast<const Mips16InstrInfo *>(MF.getSubtarget().getInstrInfo());
FrameReg = TII.loadImmediate(FrameReg, Offset, MBB, II, DL, NewImm);
Offset = SignExtend64<16>(NewImm);
IsKill = true;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 3cdf836..d67a79b 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -21,7 +21,7 @@ class Mips16InstrInfo;
class Mips16RegisterInfo : public MipsRegisterInfo {
public:
- Mips16RegisterInfo(const MipsSubtarget &Subtarget);
+ Mips16RegisterInfo();
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 776e473..b1cb7f7 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -604,7 +604,7 @@ def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
-class LoadImm64<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
!strconcat(instr_asm, "\t$rt, $imm64")> ;
-def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index c662e13..1eb3b2c 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -252,6 +252,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set the CPU and FPU Bitmasks
const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
// size of stack area to which FP callee-saved regs are saved.
unsigned CPURegSize = Mips::GPR32RegClass.getSize();
@@ -267,8 +268,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
if (Mips::GPR32RegClass.contains(Reg))
break;
- unsigned RegNum =
- TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum = TRI->getEncodingValue(Reg);
if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
@@ -283,8 +283,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
// Set CPU Bitmask.
for (; i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- unsigned RegNum =
- TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue(Reg);
+ unsigned RegNum = TRI->getEncodingValue(Reg);
CPUBitmask |= (1 << RegNum);
}
@@ -309,7 +308,7 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
/// Frame Directive
void MipsAsmPrinter::emitFrameDirective() {
- const TargetRegisterInfo &RI = *TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo &RI = *MF->getSubtarget().getRegisterInfo();
unsigned stackReg = RI.getFrameRegister(*MF);
unsigned returnReg = RI.getRARegister();
@@ -438,7 +437,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,const char *ExtraCode,
+ unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
@@ -540,18 +539,24 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
- int Offset = 0;
+ assert(OpNum + 1 < MI->getNumOperands() && "Insufficient operands");
+ const MachineOperand &BaseMO = MI->getOperand(OpNum);
+ const MachineOperand &OffsetMO = MI->getOperand(OpNum + 1);
+ assert(BaseMO.isReg() && "Unexpected base pointer for inline asm memory operand.");
+ assert(OffsetMO.isImm() && "Unexpected offset for inline asm memory operand.");
+ int Offset = OffsetMO.getImm();
+
// Currently we are expecting either no ExtraCode or 'D'
if (ExtraCode) {
if (ExtraCode[0] == 'D')
- Offset = 4;
+ Offset += 4;
else
return true; // Unknown modifier.
+ // FIXME: M = high order bits
+ // FIXME: L = low order bits
}
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline asm memory operand");
- O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+ O << Offset << "($" << MipsInstPrinter::getRegisterName(BaseMO.getReg()) << ")";
return false;
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index abee185..dcd88f2 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
]>;
def CC_MipsN : CallingConv<[
- CCIfType<[i8, i16, i32],
+ CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index ac03c0b..606964d 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -140,7 +140,7 @@ namespace {
/// memory instruction can be moved to a delay slot.
class MemDefsUses : public InspectMemInstr {
public:
- MemDefsUses(const MachineFrameInfo *MFI);
+ MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI);
private:
typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
@@ -158,6 +158,7 @@ namespace {
const MachineFrameInfo *MFI;
SmallPtrSet<ValueType, 4> Uses, Defs;
+ const DataLayout &DL;
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
@@ -212,8 +213,8 @@ namespace {
/// moved to the delay slot. Returns true on success.
template<typename IterTy>
bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
- RegDefsUses &RegDU, InspectMemInstr &IM,
- IterTy &Filler, Iter Slot) const;
+ RegDefsUses &RegDU, InspectMemInstr &IM, Iter Slot,
+ IterTy &Filler) const;
/// This function searches in the backward direction for an instruction that
/// can be moved to the delay slot. Returns true on success.
@@ -320,7 +321,8 @@ void RegDefsUses::setCallerSaved(const MachineInstr &MI) {
CallerSavedRegs.reset(Mips::ZERO);
CallerSavedRegs.reset(Mips::ZERO_64);
- for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R)
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(MI.getParent()->getParent());
+ *R; ++R)
for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI)
CallerSavedRegs.reset(*AI);
@@ -427,9 +429,9 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return true;
}
-MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
- : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false),
- SeenNoObjStore(false) {}
+MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
+ : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false),
+ SeenNoObjStore(false) {}
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
@@ -482,7 +484,7 @@ getUnderlyingObjects(const MachineInstr &MI,
const Value *V = (*MI.memoperands_begin())->getValue();
SmallVector<Value *, 4> Objs;
- GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+ GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
I != E; ++I) {
@@ -639,8 +641,8 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
template<typename IterTy>
bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
- RegDefsUses &RegDU, InspectMemInstr& IM,
- IterTy &Filler, Iter Slot) const {
+ RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
+ IterTy &Filler) const {
for (IterTy I = Begin; I != End; ++I) {
// skip debug value
if (I->isDebugValue())
@@ -688,13 +690,13 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
return false;
RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- MemDefsUses MemDU(MBB.getParent()->getFrameInfo());
+ MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
ReverseIter Filler;
RegDU.init(*Slot);
- if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler,
- Slot))
+ if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot,
+ Filler))
return false;
MBB.splice(std::next(Slot), &MBB, std::next(Filler).base());
@@ -714,7 +716,7 @@ bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const {
RegDU.setCallerSaved(*Slot);
- if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Filler, Slot))
+ if (!searchRange(MBB, std::next(Slot), MBB.end(), RegDU, NM, Slot, Filler))
return false;
MBB.splice(std::next(Slot), &MBB, Filler);
@@ -754,11 +756,11 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
IM.reset(new LoadFromStackOrConst());
} else {
const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
- IM.reset(new MemDefsUses(MFI));
+ IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
}
- if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler,
- Slot))
+ if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
+ Filler))
return false;
insertDelayFiller(Filler, BrMap);
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7d69659..7de0081 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -89,6 +89,7 @@ class MipsFastISel final : public FastISel {
private:
// Selection routines.
+ bool selectLogicalOp(const Instruction *I);
bool selectLoad(const Instruction *I);
bool selectStore(const Instruction *I);
bool selectBranch(const Instruction *I);
@@ -102,6 +103,7 @@ private:
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isTypeSupported(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool computeAddress(const Value *Obj, Address &Addr);
bool computeCallAddress(const Value *V, Address &Addr);
@@ -129,6 +131,9 @@ private:
unsigned getRegEnsuringSimpleIntegerWidening(const Value *, bool IsUnsigned);
+ unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
+ const Value *RHS);
+
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
unsigned materializeGV(const GlobalValue *GV, MVT VT);
unsigned materializeInt(const Constant *C, MVT VT);
@@ -210,6 +215,43 @@ CCAssignFn *MipsFastISel::CCAssignFnForCall(CallingConv::ID CC) const {
return CC_MipsO32;
}
+unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
+ const Value *LHS, const Value *RHS) {
+ // Canonicalize immediates to the RHS first.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
+ std::swap(LHS, RHS);
+
+ unsigned Opc;
+ if (ISDOpc == ISD::AND) {
+ Opc = Mips::AND;
+ } else if (ISDOpc == ISD::OR) {
+ Opc = Mips::OR;
+ } else if (ISDOpc == ISD::XOR) {
+ Opc = Mips::XOR;
+ } else
+ llvm_unreachable("unexpected opcode");
+
+ unsigned LHSReg = getRegForValue(LHS);
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!ResultReg)
+ return 0;
+
+ unsigned RHSReg;
+ if (!LHSReg)
+ return 0;
+
+ if (const auto *C = dyn_cast<ConstantInt>(RHS))
+ RHSReg = materializeInt(C, MVT::i32);
+ else
+ RHSReg = getRegForValue(RHS);
+
+ if (!RHSReg)
+ return 0;
+
+ emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
+ return ResultReg;
+}
+
unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -421,6 +463,21 @@ bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
return TLI.isTypeLegal(VT);
}
+bool MipsFastISel::isTypeSupported(Type *Ty, MVT &VT) {
+ if (Ty->isVectorTy())
+ return false;
+
+ if (isTypeLegal(Ty, VT))
+ return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT))
return true;
@@ -671,6 +728,33 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
return false;
}
+bool MipsFastISel::selectLogicalOp(const Instruction *I) {
+ MVT VT;
+ if (!isTypeSupported(I->getType(), VT))
+ return false;
+
+ unsigned ResultReg;
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected instruction.");
+ case Instruction::And:
+ ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Or:
+ ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ case Instruction::Xor:
+ ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
+ break;
+ }
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
bool MipsFastISel::selectLoad(const Instruction *I) {
// Atomic loads need special handling.
if (cast<LoadInst>(I)->isAtomic())
@@ -1083,7 +1167,7 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
@@ -1312,6 +1396,10 @@ bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
return selectLoad(I);
case Instruction::Store:
return selectStore(I);
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return selectLogicalOp(I);
case Instruction::Br:
return selectBranch(I);
case Instruction::Ret:
@@ -1354,7 +1442,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
void MipsFastISel::simplifyAddress(Address &Addr) {
if (!isInt<16>(Addr.getOffset())) {
unsigned TempReg =
- materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
+ materialize32BitInt(Addr.getOffset(), &Mips::GPR32RegClass);
unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::ADDu, DestReg).addReg(TempReg).addReg(Addr.getReg());
Addr.setReg(DestReg);
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 21fc8ce..c78c329 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -230,9 +230,18 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
}
bool MipsDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- OutOps.push_back(Op);
- return false;
+ // All memory constraints can at least accept raw pointers.
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_ZC:
+ OutOps.push_back(Op);
+ return false;
+ }
+ return true;
}
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 6b72877..aec731e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -125,7 +125,7 @@ private:
virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
};
}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9253b2e..e4bae03 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -617,6 +617,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
+
+ ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse);
+ if (!FalseC || FalseC->getZExtValue())
+ return SDValue();
+
+ // Since RHS (False) is 0, we swap the order of the True/False operands
+ // (obviously also inverting the condition) so that we can
+ // take advantage of conditional moves using the $0 register.
+ // Example:
+ // return (a != 0) ? x : 0;
+ // load $reg, x
+ // movz $reg, $0, a
+ unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
+ MipsISD::CMovFP_T;
+
+ SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
+ return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
+ ValueIfFalse, FCC, ValueIfTrue, Glue);
+}
+
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
@@ -750,6 +777,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, DCI, Subtarget);
+ case MipsISD::CMovFP_F:
+ case MipsISD::CMovFP_T:
+ return performCMovFPCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@@ -2451,7 +2481,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
if (Subtarget.inMips16HardFloat()) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
@@ -3001,6 +3032,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
+bool
+MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
+ if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
+ if (Type == MVT::i32)
+ return true;
+ }
+ return IsSigned;
+}
+
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
@@ -3133,6 +3173,10 @@ getConstraintType(const std::string &Constraint) const
return C_Memory;
}
}
+
+ if (Constraint == "ZC")
+ return C_Memory;
+
return TargetLowering::getConstraintType(Constraint);
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 9f86a43..40b6661 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -475,6 +475,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
+
// Inline asm support
ConstraintType
getConstraintType(const std::string &Constraint) const override;
@@ -503,6 +505,15 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "R")
+ return InlineAsm::Constraint_R;
+ else if (ConstraintCode == "ZC")
+ return InlineAsm::Constraint_ZC;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index db149d4..7b2b289 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -29,7 +29,7 @@
#include "MipsGenInstrInfo.inc"
namespace llvm {
-
+class MipsSubtarget;
class MipsInstrInfo : public MipsGenInstrInfo {
virtual void anchor();
protected:
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 04a16b3..c937d2b 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1000,7 +1000,7 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
SDPatternOperator Op = null_frag>:
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
- [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
+ [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
FrmR, opstr>, ISA_MIPS32R2;
class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
@@ -1008,7 +1008,7 @@ class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
- NoItinerary, FrmR, opstr>, ISA_MIPS32R2 {
+ II_INS, FrmR, opstr>, ISA_MIPS32R2 {
let Constraints = "$src = $rt";
}
@@ -1140,12 +1140,13 @@ def XORi : MMRel, ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI, immZExt16,
xor>,
ADDI_FM<0xe>;
def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16>, LUI_FM;
-
+let AdditionalPredicates = [NotInMicroMips] in {
/// Arithmetic Instructions (3-Operand, R-Type)
def ADDu : MMRel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
ADD_FM<0, 0x21>;
def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
ADD_FM<0, 0x23>;
+}
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
@@ -1579,6 +1580,8 @@ def : MipsInstAlias<"sltu $rt, $rs, $imm",
(SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"xor $rs, $rt, $imm",
(XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"xor $rs, $imm",
+ (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $imm",
@@ -1639,20 +1642,21 @@ def : MipsInstAlias<"sync",
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
-class LoadImm32<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32Reg : LoadImm32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
-class LoadAddress<string instr_asm, Operand MemOpnd, RegisterOperand RO> :
+class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
+ RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins MemOpnd:$addr),
!strconcat(instr_asm, "\t$rt, $addr")> ;
-def LoadAddr32Reg : LoadAddress<"la", mem, GPR32Opnd>;
+def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
-class LoadAddressImm<string instr_asm, Operand Od, RegisterOperand RO> :
+class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddr32Imm : LoadAddressImm<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
"jal\t$rd, $rs"> ;
@@ -1761,9 +1765,11 @@ def : WrapperPat<tblockaddress, ADDiu, GPR32>;
def : WrapperPat<tjumptable, ADDiu, GPR32>;
def : WrapperPat<tglobaltlsaddr, ADDiu, GPR32>;
+let AdditionalPredicates = [NotInMicroMips] in {
// Mips does not have "not", so we expand our way
def : MipsPat<(not GPR32:$in),
(NOR GPR32Opnd:$in, ZERO)>;
+}
// extended loads
def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 821392e..5258181 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
using namespace llvm;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 30b93dc..09e722d 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -79,14 +79,19 @@ unsigned MipsFunctionInfo::getGlobalBaseReg() {
if (GlobalBaseReg)
return GlobalBaseReg;
+ MipsSubtarget const &STI =
+ static_cast<const MipsSubtarget &>(MF.getSubtarget());
+
const TargetRegisterClass *RC =
- static_cast<const MipsSubtarget &>(MF.getSubtarget()).inMips16Mode()
+ STI.inMips16Mode()
? &Mips::CPU16RegsRegClass
- : static_cast<const MipsTargetMachine &>(MF.getTarget())
- .getABI()
- .IsN64()
- ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass;
+ : STI.inMicroMipsMode()
+ ? &Mips::GPRMM16RegClass
+ : static_cast<const MipsTargetMachine &>(MF.getTarget())
+ .getABI()
+ .IsN64()
+ ? &Mips::GPR64RegClass
+ : &Mips::GPR32RegClass;
return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
}
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index b011e8f..b18a673 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -8,15 +8,36 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsISelDAGToDAG.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "llvm/Support/Casting.h"
+#include "Mips.h"
+#include "MipsTargetMachine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
#define DEBUG_TYPE "mips-isel"
-namespace llvm {
+namespace {
+ class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+ : MachineFunctionPass(ID), TM(TM_) {}
+
+ // Pass Name
+ const char *getPassName() const override {
+ return "MIPS DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ protected:
+ MipsTargetMachine &TM;
+ };
+
+ char MipsModuleDAGToDAGISel::ID = 0;
+}
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
@@ -24,13 +45,6 @@ bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-char MipsModuleDAGToDAGISel::ID = 0;
-
-}
-
-
-llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+llvm::FunctionPass *llvm::createMipsModuleISelDagPass(MipsTargetMachine &TM) {
return new MipsModuleDAGToDAGISel(TM);
}
-
-
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
deleted file mode 100644
index 85bae47..0000000
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===---- MipsModuleISelDAGToDAG.h - Change Subtarget --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pass used to change the subtarget for the
-// Mips Instruction selector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-#define LLVM_LIB_TARGET_MIPS_MIPSMODULEISELDAGTODAG_H
-
-#include "Mips.h"
-#include "MipsSubtarget.h"
-#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
-// instructions for SelectionDAG operations.
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-class MipsModuleDAGToDAGISel : public MachineFunctionPass {
-public:
-
- static char ID;
-
- explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
- : MachineFunctionPass(ID), TM(TM_) {}
-
- // Pass Name
- const char *getPassName() const override {
- return "MIPS DAG->DAG Pattern Instruction Selection";
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
-protected:
- MipsTargetMachine &TM;
-};
-
-/// createMipsISelDag - This pass converts a legalized DAG into a
-/// MIPS-specific DAG, ready for instruction scheduling.
-FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 7aae964..b6cd791 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -11,14 +11,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsOs16.h"
+#include "llvm/IR/Instructions.h"
+#include "Mips.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#define DEBUG_TYPE "mips-os16"
+using namespace llvm;
+#define DEBUG_TYPE "mips-os16"
static cl::opt<std::string> Mips32FunctionMask(
"mips32-function-mask",
@@ -27,70 +29,83 @@ static cl::opt<std::string> Mips32FunctionMask(
cl::Hidden);
namespace {
+ class MipsOs16 : public ModulePass {
+ public:
+ static char ID;
+
+ MipsOs16() : ModulePass(ID) {}
+
+ const char *getPassName() const override {
+ return "MIPS Os16 Optimization";
+ }
+
+ bool runOnModule(Module &M) override;
+ };
+
+ char MipsOs16::ID = 0;
+}
- // Figure out if we need float point based on the function signature.
- // We need to move variables in and/or out of floating point
- // registers because of the ABI
- //
- bool needsFPFromSig(Function &F) {
- Type* RetType = F.getReturnType();
- switch (RetType->getTypeID()) {
+// Figure out if we need float point based on the function signature.
+// We need to move variables in and/or out of floating point
+// registers because of the ABI
+//
+static bool needsFPFromSig(Function &F) {
+ Type* RetType = F.getReturnType();
+ switch (RetType->getTypeID()) {
+ case Type::FloatTyID:
+ case Type::DoubleTyID:
+ return true;
+ default:
+ ;
+ }
+ if (F.arg_size() >=1) {
+ Argument &Arg = F.getArgumentList().front();
+ switch (Arg.getType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
;
}
- if (F.arg_size() >=1) {
- Argument &Arg = F.getArgumentList().front();
- switch (Arg.getType()->getTypeID()) {
- case Type::FloatTyID:
- case Type::DoubleTyID:
- return true;
- default:
- ;
- }
- }
- return false;
}
+ return false;
+}
- // Figure out if the function will need floating point operations
- //
- bool needsFP(Function &F) {
- if (needsFPFromSig(F))
- return true;
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+// Figure out if the function will need floating point operations
+//
+static bool needsFP(Function &F) {
+ if (needsFPFromSig(F))
+ return true;
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
I != E; ++I) {
- const Instruction &Inst = *I;
- switch (Inst.getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FCmp:
+ const Instruction &Inst = *I;
+ switch (Inst.getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ default:
+ ;
+ }
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ DEBUG(dbgs() << "Working on call" << "\n");
+ Function &F_ = *CI->getCalledFunction();
+ if (needsFPFromSig(F_))
return true;
- default:
- ;
- }
- if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- DEBUG(dbgs() << "Working on call" << "\n");
- Function &F_ = *CI->getCalledFunction();
- if (needsFPFromSig(F_))
- return true;
- }
}
- return false;
- }
+ }
+ return false;
}
-namespace llvm {
bool MipsOs16::runOnModule(Module &M) {
@@ -136,12 +151,6 @@ bool MipsOs16::runOnModule(Module &M) {
return modified;
}
-char MipsOs16::ID = 0;
-
-}
-
-ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+ModulePass *llvm::createMipsOs16Pass(MipsTargetMachine &TM) {
return new MipsOs16;
}
-
-
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
deleted file mode 100644
index 77183ec..0000000
--- a/lib/Target/Mips/MipsOs16.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//===---- MipsOs16.h for Mips Option -Os16 --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an optimization phase for the MIPS target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-#define LLVM_LIB_TARGET_MIPS_MIPSOS16_H
-
-#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsTargetMachine.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetMachine.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-class MipsOs16 : public ModulePass {
-
-public:
- static char ID;
-
- MipsOs16() : ModulePass(ID) {
-
- }
-
- const char *getPassName() const override {
- return "MIPS Os16 Optimization";
- }
-
- bool runOnModule(Module &M) override;
-
-};
-
-ModulePass *createMipsOs16(MipsTargetMachine &TM);
-
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 2110c03..0ea48b1 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -43,14 +43,14 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
-MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
- : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
+MipsRegisterInfo::MipsRegisterInfo() : MipsGenRegisterInfo(Mips::RA) {}
unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
const TargetRegisterClass *
MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
return Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
}
@@ -63,7 +63,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case Mips::GPR32RegClassID:
case Mips::GPR64RegClassID:
case Mips::DSPRRegClassID: {
- const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
return 28 - TFI->hasFP(MF);
}
case Mips::FGR32RegClassID:
@@ -82,6 +82,7 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
/// Mips Callee Saved Registers
const MCPhysReg *
MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
@@ -100,8 +101,10 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_O32_SaveList;
}
-const uint32_t*
-MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+const uint32_t *
+MipsRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_RegMask;
@@ -135,6 +138,7 @@ getReservedRegs(const MachineFunction &MF) const {
};
BitVector Reserved(getNumRegs());
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
typedef TargetRegisterClass::const_iterator RegIter;
for (unsigned I = 0; I < array_lengthof(ReservedGPR32); ++I)
@@ -257,6 +261,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
+ const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
bool IsN64 =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9ec4a38..031b93e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -21,15 +21,9 @@
#include "MipsGenRegisterInfo.inc"
namespace llvm {
-class MipsSubtarget;
-class Type;
-
class MipsRegisterInfo : public MipsGenRegisterInfo {
-protected:
- const MipsSubtarget &Subtarget;
-
public:
- MipsRegisterInfo(const MipsSubtarget &Subtarget);
+ MipsRegisterInfo();
/// getRegisterNumbering - Given the enum value for some register, e.g.
/// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -47,9 +41,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
static const uint32_t *getMips16RetHelperMask();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 0761ded..a598c3f 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -258,8 +258,12 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
CurDAG->getTargetConstant(Mips::sub_32, VT));
}
- SDNode *AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT,
- SDValue(Carry, 0), RHS);
+ // Generate a second addition only if we know that RHS is not a
+ // constant-zero node.
+ SDNode *AddCarry = Carry;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue())
+ AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
SDValue(AddCarry, 0));
@@ -378,6 +382,17 @@ bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base,
selectAddrDefault(Addr, Base, Offset);
}
+bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (selectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 9))
+ return true;
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
if (selectAddrFrameIndex(Addr, Base, Offset))
@@ -401,6 +416,17 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm12(SDValue Addr, SDValue &Base,
return false;
}
+bool MipsSEDAGToDAGISel::selectAddrRegImm16(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const {
+ if (selectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (selectAddrFrameIndexOffset(Addr, Base, Offset, 16))
+ return true;
+
+ return false;
+}
+
bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
return selectAddrRegImm12(Addr, Base, Offset) ||
@@ -912,6 +938,60 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
return std::make_pair(false, nullptr);
}
+bool MipsSEDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) {
+ SDValue Base, Offset;
+
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ // All memory constraints can at least accept raw pointers.
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_R:
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ case InlineAsm::Constraint_m:
+ if (selectAddrRegImm16(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ case InlineAsm::Constraint_ZC:
+ // ZC matches whatever the pref, ll, and sc instructions can handle for the
+ // given subtarget.
+ if (Subtarget->inMicroMipsMode()) {
+ // On microMIPS, they can handle 12-bit offsets.
+ if (selectAddrRegImm12(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ } else if (Subtarget->hasMips32r6()) {
+ // On MIPS32r6/MIPS64r6, they can only handle 9-bit offsets.
+ if (selectAddrRegImm9(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ } else if (selectAddrRegImm16(Op, Base, Offset)) {
+ // Prior to MIPS32r6/MIPS64r6, they can handle 16-bit offsets.
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ // In all cases, 0-bit offsets are acceptable.
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
+ }
+ return true;
+}
+
FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
return new MipsSEDAGToDAGISel(TM);
}
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2d24eb4..a11fcf4 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -56,12 +56,18 @@ private:
bool selectIntAddr(SDValue Addr, SDValue &Base,
SDValue &Offset) const override;
+ bool selectAddrRegImm9(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
SDValue &Offset) const;
+ bool selectAddrRegImm16(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
+
bool selectIntAddrMM(SDValue Addr, SDValue &Base,
SDValue &Offset) const override;
@@ -111,6 +117,10 @@ private:
// Insert instructions to initialize the global base register in the
// first MBB of the function.
void initGlobalBaseReg(MachineFunction &MF);
+
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
};
FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 74f291f..b992579 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
MipsSEInstrInfo::MipsSEInstrInfo(const MipsSubtarget &STI)
: MipsInstrInfo(STI, STI.getRelocationModel() == Reloc::PIC_ ? Mips::B
: Mips::J),
- RI(STI) {}
+ RI() {}
const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
return RI;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 55c6638..b89207e 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -41,8 +42,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-reg-info"
-MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST)
- : MipsRegisterInfo(ST) {}
+MipsSERegisterInfo::MipsSERegisterInfo() : MipsRegisterInfo() {}
bool MipsSERegisterInfo::
requiresRegisterScavenging(const MachineFunction &MF) const {
@@ -110,6 +110,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
MachineFunction &MF = *MI.getParent()->getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ bool isN64 =
+ static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
int MinCSFI = 0;
@@ -132,7 +134,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
unsigned FrameReg;
if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
- FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+ FrameReg = isN64 ? Mips::SP_64 : Mips::SP;
else
FrameReg = getFrameRegister(MF);
@@ -165,9 +167,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- unsigned ADDiu = Subtarget.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ unsigned ADDiu = isN64 ? Mips::DADDiu : Mips::ADDiu;
const TargetRegisterClass *RC =
- Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+ isN64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
unsigned Reg = RegInfo.createVirtualRegister(RC);
const MipsSEInstrInfo &TII =
@@ -183,7 +185,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// instructions.
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = II->getDebugLoc();
- unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+ unsigned ADDu = isN64 ? Mips::DADDu : Mips::ADDu;
unsigned NewImm = 0;
const MipsSEInstrInfo &TII =
*static_cast<const MipsSEInstrInfo *>(
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 6b70d07..ebae190 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -22,7 +22,7 @@ class MipsSEInstrInfo;
class MipsSERegisterInfo : public MipsRegisterInfo {
public:
- MipsSERegisterInfo(const MipsSubtarget &Subtarget);
+ MipsSERegisterInfo();
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index ea98199..54b5d28 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -65,7 +65,9 @@ def II_DSRL32 : InstrItinClass;
def II_DSRLV : InstrItinClass;
def II_DSUBU : InstrItinClass;
def II_DSUB : InstrItinClass;
+def II_EXT : InstrItinClass; // Any EXT instruction
def II_FLOOR : InstrItinClass;
+def II_INS : InstrItinClass; // Any INS instruction
def II_LB : InstrItinClass;
def II_LBU : InstrItinClass;
def II_LD : InstrItinClass;
@@ -198,6 +200,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_DSUB , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DROTR , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DROTRV , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_EXT , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_INS , [InstrStage<1, [ALU]>]>,
InstrItinData<II_LUI , [InstrStage<1, [ALU]>]>,
InstrItinData<II_MOVF , [InstrStage<1, [ALU]>]>,
InstrItinData<II_MOVN , [InstrStage<1, [ALU]>]>,
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 86c8931..79f6617 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -14,14 +14,11 @@
#include "MipsTargetMachine.h"
#include "Mips.h"
#include "Mips16FrameLowering.h"
-#include "Mips16HardFloat.h"
#include "Mips16ISelDAGToDAG.h"
#include "Mips16ISelLowering.h"
#include "Mips16InstrInfo.h"
#include "MipsFrameLowering.h"
#include "MipsInstrInfo.h"
-#include "MipsModuleISelDAGToDAG.h"
-#include "MipsOs16.h"
#include "MipsSEFrameLowering.h"
#include "MipsSEISelDAGToDAG.h"
#include "MipsSEISelLowering.h"
@@ -34,6 +31,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+
using namespace llvm;
#define DEBUG_TYPE "mips"
@@ -46,8 +44,12 @@ extern "C" void LLVMInitializeMipsTarget() {
RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
-static std::string computeDataLayout(bool isLittle, MipsABIInfo &ABI) {
+static std::string computeDataLayout(StringRef TT, StringRef CPU,
+ const TargetOptions &Options,
+ bool isLittle) {
std::string Ret = "";
+ MipsABIInfo ABI =
+ MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions);
// There are both little and big endian mips.
if (isLittle)
@@ -86,11 +88,11 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
+ CPU, FS, Options, RM, CM, OL),
isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(Triple(TT), CPU, Options.MCOptions)),
- DL(computeDataLayout(isLittle, ABI)), Subtarget(nullptr),
- DefaultSubtarget(TT, CPU, FS, isLittle, *this),
+ Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
isLittle, *this),
Mips16Subtarget(TT, CPU, FS.empty() ? "+mips16" : FS.str() + ",+mips16",
@@ -209,14 +211,14 @@ void MipsPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createAtomicExpandPass(&getMipsTargetMachine()));
if (getMipsSubtarget().os16())
- addPass(createMipsOs16(getMipsTargetMachine()));
+ addPass(createMipsOs16Pass(getMipsTargetMachine()));
if (getMipsSubtarget().inMips16HardFloat())
- addPass(createMips16HardFloat(getMipsTargetMachine()));
+ addPass(createMips16HardFloatPass(getMipsTargetMachine()));
}
// Install an instruction selector pass using
// the ISelDag to gen Mips code.
bool MipsPassConfig::addInstSelector() {
- addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+ addPass(createMipsModuleISelDagPass(getMipsTargetMachine()));
addPass(createMips16ISelDag(getMipsTargetMachine()));
addPass(createMipsSEISelDag(getMipsTargetMachine()));
return false;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index afd0cea..5427d6a 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -31,7 +31,6 @@ class MipsTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
// Selected ABI
MipsABIInfo ABI;
- const DataLayout DL; // Calculates type size & alignment
MipsSubtarget *Subtarget;
MipsSubtarget DefaultSubtarget;
MipsSubtarget NoMips16Subtarget;
@@ -47,8 +46,7 @@ public:
TargetIRAnalysis getTargetIRAnalysis() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const MipsSubtarget *getSubtargetImpl() const override {
+ const MipsSubtarget *getSubtargetImpl() const {
if (Subtarget)
return Subtarget;
return &DefaultSubtarget;
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index c07693e..723b63b 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,6 +9,7 @@
#include "MipsTargetObjectFile.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -44,7 +45,7 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
ELF::SHF_WRITE | ELF::SHF_ALLOC);
- this->TM = &TM;
+ this->TM = &static_cast<const MipsTargetMachine &>(TM);
}
// A address must be loaded from a small section if its size is less than the
@@ -84,7 +85,8 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
bool MipsTargetObjectFile::
IsGlobalInSmallSectionImpl(const GlobalValue *GV,
const TargetMachine &TM) const {
- const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ const MipsSubtarget &Subtarget =
+ *static_cast<const MipsTargetMachine &>(TM).getSubtargetImpl();
// Return if small section is not available.
if (!Subtarget.useSmallSection())
@@ -127,9 +129,11 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
/// Return true if this constant should be placed into small data section.
bool MipsTargetObjectFile::
IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
- return (
- TM.getSubtarget<MipsSubtarget>().useSmallSection() && LocalSData &&
- IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(CN->getType())));
+ return (static_cast<const MipsTargetMachine &>(TM)
+ .getSubtargetImpl()
+ ->useSmallSection() &&
+ LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
+ CN->getType())));
}
const MCSection *MipsTargetObjectFile::
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 3a2b298..45ed9d0 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -13,11 +13,11 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-
+class MipsTargetMachine;
class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
const MCSection *SmallDataSection;
const MCSection *SmallBSSSection;
- const TargetMachine *TM;
+ const MipsTargetMachine *TM;
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index b3b8296..1ff041d 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -92,9 +92,9 @@ public:
}
virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
- virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
- virtual void emitMipsAbiFlags(){};
+ virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value);
void forbidModuleDirective() { ModuleDirectiveAllowed = false; }
+ void reallowModuleDirective() { ModuleDirectiveAllowed = true; }
bool isModuleDirectiveAllowed() { return ModuleDirectiveAllowed; }
// This method enables template classes to set internal abi flags
@@ -197,7 +197,6 @@ public:
bool Is32BitABI) override;
void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
- void emitMipsAbiFlags() override;
};
// This part is for ELF object output
@@ -240,7 +239,7 @@ public:
// ABI Flags
void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
- void emitMipsAbiFlags() override;
+ void emitMipsAbiFlags();
};
}
#endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 3a4a19d..cdd2f1f 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -29,7 +29,6 @@ set(NVPTXCodeGen_sources
NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp
- NVPTXutil.cpp
NVVMReflect.cpp
)
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 11d737e..b9df3d1 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -39,6 +39,8 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
InlineAsmEnd = " inline asm";
SupportsDebugInformation = CompileForDebugging;
+ // PTX does not allow .align on functions.
+ HasFunctionAlignment = false;
HasDotTypeDotSizeDirective = false;
Data8bitsDirective = " .b8 ";
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 158ca90..2b4d864 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -71,35 +71,23 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
// Force static initialization.
extern "C" void LLVMInitializeNVPTXTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfo<NVPTXMCAsmInfo> X(TheNVPTXTarget32);
- RegisterMCAsmInfo<NVPTXMCAsmInfo> Y(TheNVPTXTarget64);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget32,
- createNVPTXMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheNVPTXTarget64,
- createNVPTXMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget32, createNVPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheNVPTXTarget64, createNVPTXMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget32,
- createNVPTXMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheNVPTXTarget64,
- createNVPTXMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget32,
- createNVPTXMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheNVPTXTarget64,
- createNVPTXMCSubtargetInfo);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget32,
- createNVPTXMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheNVPTXTarget64,
- createNVPTXMCInstPrinter);
+ for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createNVPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createNVPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createNVPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createNVPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createNVPTXMCInstPrinter);
+ }
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index 98821d2..bfd5123 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
#define LLVM_LIB_TARGET_NVPTX_MCTARGETDESC_NVPTXMCTARGETDESC_H
+#include <stdint.h>
+
namespace llvm {
class Target;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 1f37696..4f3ccf4 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,11 +12,33 @@
//===----------------------------------------------------------------------===//
#include "NVPTXAllocaHoisting.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+using namespace llvm;
-namespace llvm {
+namespace {
+// Hoisting the alloca instructions in the non-entry blocks to the entry
+// block.
+class NVPTXAllocaHoisting : public FunctionPass {
+public:
+ static char ID; // Pass ID
+ NVPTXAllocaHoisting() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ const char *getPassName() const override {
+ return "NVPTX specific alloca hoisting";
+ }
+
+ bool runOnFunction(Function &function) override;
+};
+} // namespace
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
bool functionModified = false;
@@ -36,11 +58,15 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
return functionModified;
}
-char NVPTXAllocaHoisting::ID = 1;
-static RegisterPass<NVPTXAllocaHoisting>
-X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
- "blocks to the entry block");
+char NVPTXAllocaHoisting::ID = 0;
+
+namespace llvm {
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
+}
-FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
+INITIALIZE_PASS(
+ NVPTXAllocaHoisting, "alloca-hoisting",
+ "Hoisting alloca instructions in non-entry blocks to the entry block",
+ false, false)
-} // end namespace llvm
+FunctionPass *llvm::createAllocaHoisting() { return new NVPTXAllocaHoisting; }
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c343980..7a6fc7d 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -14,38 +14,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXALLOCAHOISTING_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
-
class FunctionPass;
-class Function;
-
-// Hoisting the alloca instructions in the non-entry blocks to the entry
-// block.
-class NVPTXAllocaHoisting : public FunctionPass {
-public:
- static char ID; // Pass ID
- NVPTXAllocaHoisting() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- const char *getPassName() const override {
- return "NVPTX specific alloca hoisting";
- }
-
- bool runOnFunction(Function &function) override;
-};
extern FunctionPass *createAllocaHoisting();
-
} // end namespace llvm
#endif
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 833db04..cc58b07 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -504,8 +504,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
unsigned RegNo = MI->getOperand(0).getReg();
- const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo();
- if (TRI->isVirtualRegister(RegNo)) {
+ if (TargetRegisterInfo::isVirtualRegister(RegNo)) {
OutStreamer.AddComment(Twine("implicit-def: ") +
getVirtualRegisterName(RegNo));
} else {
@@ -522,15 +521,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of reqntid* is specified, don't output reqntid directive.
unsigned reqntidx, reqntidy, reqntidz;
bool specified = false;
- if (llvm::getReqNTIDx(F, reqntidx) == false)
+ if (!llvm::getReqNTIDx(F, reqntidx))
reqntidx = 1;
else
specified = true;
- if (llvm::getReqNTIDy(F, reqntidy) == false)
+ if (!llvm::getReqNTIDy(F, reqntidy))
reqntidy = 1;
else
specified = true;
- if (llvm::getReqNTIDz(F, reqntidz) == false)
+ if (!llvm::getReqNTIDz(F, reqntidz))
reqntidz = 1;
else
specified = true;
@@ -544,15 +543,15 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
// If none of maxntid* is specified, don't output maxntid directive.
unsigned maxntidx, maxntidy, maxntidz;
specified = false;
- if (llvm::getMaxNTIDx(F, maxntidx) == false)
+ if (!llvm::getMaxNTIDx(F, maxntidx))
maxntidx = 1;
else
specified = true;
- if (llvm::getMaxNTIDy(F, maxntidy) == false)
+ if (!llvm::getMaxNTIDy(F, maxntidy))
maxntidy = 1;
else
specified = true;
- if (llvm::getMaxNTIDz(F, maxntidz) == false)
+ if (!llvm::getMaxNTIDz(F, maxntidz))
maxntidz = 1;
else
specified = true;
@@ -673,7 +672,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
}
for (const User *UU : U->users())
- if (usedInOneFunc(UU, oneFunc) == false)
+ if (!usedInOneFunc(UU, oneFunc))
return false;
return true;
@@ -687,7 +686,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
* 3. Is the global variable referenced only in one function?
*/
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
- if (gv->hasInternalLinkage() == false)
+ if (!gv->hasInternalLinkage())
return false;
const PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
@@ -696,7 +695,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
- if (flag == false)
+ if (!flag)
return false;
if (!oneFunc)
return false;
@@ -1472,7 +1471,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1788,7 +1787,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
ptr = (unsigned char *)&int32;
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1810,7 +1809,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, TD))) {
+ ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
ptr = (unsigned char *)&int64;
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -2085,13 +2084,6 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
}
}
-
-// Force static initialization.
-extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
- RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
- RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
-}
-
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
LineReader *reader = this->getReader(filename.str());
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7e6b5e8..9b11e70 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -92,8 +92,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
bool EmitGeneric;
public:
- AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
- : size(_size), buffer(_size), O(_O), AP(_AP) {
+ AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
+ : size(size), buffer(size), O(O), AP(AP) {
curpos = 0;
numSymbols = 0;
EmitGeneric = AP.EmitGeneric;
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index f3a095d..6d7c99c 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -123,10 +123,9 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
// =>
// %0 = gep X, indices
// %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
- Indices,
- GEP->getName(),
- GEPI);
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ GEP->getSourceElementType(), Cast->getOperand(0), Indices,
+ GEP->getName(), GEPI);
NewGEPI->setIsInBounds(GEP->isInBounds());
GEP->replaceAllUsesWith(
new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 86d134b..850c020 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -343,6 +343,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
// GetElementPtrConstantExpr
return cast<GEPOperator>(C)->isInBounds()
? Builder.CreateGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e01c780..52c5e1b 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -78,10 +78,7 @@ bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
return UsePrecSqrtF32;
} else {
// Otherwise, use sqrt.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return false;
- else
- return true;
+ return !TM.Options.UnsafeFPMath;
}
}
@@ -5044,12 +5041,12 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default:
return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ca432b5..6d845c9 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -48,7 +48,7 @@ public:
const NVPTXSubtarget *Subtarget;
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 1dc81f7..ff74e6e 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -930,7 +930,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
}
first = false;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
@@ -1075,7 +1075,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VT = Outs[OIdx].VT;
Type *Ty = Args[i].Ty;
- if (Outs[OIdx].Flags.isByVal() == false) {
+ if (!Outs[OIdx].Flags.isByVal()) {
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
@@ -1459,7 +1459,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (NumElts == 1) {
// Just a simple load
@@ -1577,7 +1577,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = sz < 8 ? true : false;
+ bool needTruncate = sz < 8;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1940,9 +1940,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
}
// Then any remaining arguments
- for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Ops.append(N->op_begin() + 2, N->op_end());
SDValue NewSt = DAG.getMemIntrinsicNode(
Opcode, DL, DAG.getVTList(MVT::Other), Ops,
@@ -2118,7 +2116,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+ if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> offsets;
@@ -4494,7 +4492,6 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete DwarfLocSection;
delete DwarfARangesSection;
delete DwarfRangesSection;
- delete DwarfMacroInfoSection;
}
const MCSection *
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 1b4da2c..8594364 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -497,6 +497,12 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f0c3663..578401a 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXLowerAggrCopies.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -22,10 +24,33 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "nvptx"
using namespace llvm;
-namespace llvm { FunctionPass *createLowerAggrCopies(); }
+namespace {
+// actual analysis class, which is a functionpass
+struct NVPTXLowerAggrCopies : public FunctionPass {
+ static char ID;
+
+ NVPTXLowerAggrCopies() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ static const unsigned MaxAggrCopySize = 128;
+
+ const char *getPassName() const override {
+ return "Lower aggregate copies/intrinsics into loops";
+ }
+};
+} // namespace
char NVPTXLowerAggrCopies::ID = 0;
@@ -104,7 +129,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<MemTransferInst *, 4> aggrMemcpys;
SmallVector<MemSetInst *, 4> aggrMemsets;
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
//
@@ -117,10 +142,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
++II) {
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (load->hasOneUse() == false)
+ if (!load->hasOneUse())
continue;
- if (DL->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
continue;
User *use = load->user_back();
@@ -166,7 +191,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
Value *srcAddr = load->getOperand(0);
Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL->getTypeStoreSize(load->getType());
+ unsigned numLoads = DL.getTypeStoreSize(load->getType());
Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index da301d5..3c39f53 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,35 +15,10 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
#define LLVM_LIB_TARGET_NVPTX_NVPTXLOWERAGGRCOPIES_H
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Pass.h"
-
namespace llvm {
+class FunctionPass;
-// actual analysis class, which is a functionpass
-struct NVPTXLowerAggrCopies : public FunctionPass {
- static char ID;
-
- NVPTXLowerAggrCopies() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DataLayoutPass>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- }
-
- bool runOnFunction(Function &F) override;
-
- static const unsigned MaxAggrCopySize = 128;
-
- const char *getPassName() const override {
- return "Lower aggregate copies/intrinsics into loops";
- }
-};
-
-extern FunctionPass *createLowerAggrCopies();
+FunctionPass *createLowerAggrCopies();
}
#endif
diff --git a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
index 3149399..68dfbb7 100644
--- a/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
@@ -35,7 +35,8 @@ namespace llvm {
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
}
-class LLVM_LIBRARY_VISIBILITY NVPTXLowerStructArgs : public FunctionPass {
+namespace {
+class NVPTXLowerStructArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
void handleStructPtrArgs(Function &);
@@ -48,6 +49,7 @@ public:
return "Copy structure (byval *) arguments to stack";
}
};
+} // namespace
char NVPTXLowerStructArgs::ID = 1;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index d39a394..f075b8b 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -29,8 +29,8 @@ private:
const VariantKind Kind;
const APFloat Flt;
- explicit NVPTXFloatMCExpr(VariantKind _Kind, APFloat _Flt)
- : Kind(_Kind), Flt(_Flt) {}
+ explicit NVPTXFloatMCExpr(VariantKind Kind, APFloat Flt)
+ : Kind(Kind), Flt(Flt) {}
public:
/// @name Construction
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 5ca96e4..6e97f9ef 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -78,7 +78,7 @@ NVPTXRegisterInfo::NVPTXRegisterInfo() : NVPTXGenRegisterInfo(0) {}
/// NVPTX Callee Saved Registers
const MCPhysReg *
-NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 75b8f15..c310a9c 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -35,8 +35,7 @@ public:
//------------------------------------------------------
// NVPTX callee saved registers
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1d3cb4..0d2627d 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -26,7 +26,7 @@ namespace llvm {
class NVPTXSection : public MCSection {
virtual void anchor();
public:
- NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K) {}
+ NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
virtual ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
@@ -36,11 +36,8 @@ public:
const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- bool isBaseAddressKnownZero() const override { return true; }
bool UseCodeAlign() const override { return false; }
bool isVirtualSection() const override { return false; }
- std::string getLabelBeginName() const override { return ""; }
- std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 1a267a6..1b6bc71 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -50,6 +50,7 @@ using namespace llvm;
namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
+void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXLowerStructArgsPass(PassRegistry &);
@@ -64,6 +65,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
@@ -86,9 +88,10 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit),
- TLOF(make_unique<NVPTXTargetObjectFile>()),
- DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) {
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this) {
if (Triple(TT).getOS() == Triple::NVCL)
drvInterface = NVPTX::NVCL;
else
@@ -183,8 +186,7 @@ void NVPTXPassConfig::addIRPasses() {
}
bool NVPTXPassConfig::addInstSelector() {
- const NVPTXSubtarget &ST =
- getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+ const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index a81abfe..b8df5af 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -27,7 +27,6 @@ namespace llvm {
class NVPTXTargetMachine : public LLVMTargetMachine {
bool is64bit;
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
NVPTX::DrvInterface drvInterface;
NVPTXSubtarget Subtarget;
@@ -40,8 +39,10 @@ public:
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
~NVPTXTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const NVPTXSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
+ const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
bool is64Bit() const { return is64bit; }
NVPTX::DrvInterface getDrvInterface() const { return drvInterface; }
ManagedStringPool *getManagedStrPool() const {
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 00ceca5..5d9ab0d 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,7 +41,6 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
- DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -83,8 +82,6 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
- DwarfMacroInfoSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
const MCSection *getSectionForConstant(SectionKind Kind,
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index cf1feac..1f178af 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -293,12 +293,9 @@ bool llvm::isKernelFunction(const Function &F) {
unsigned x = 0;
bool retval = llvm::findOneNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
- if (retval == false) {
+ if (!retval) {
// There is no NVVM metadata, check the calling convention
- if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
- return true;
- else
- return false;
+ return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
}
return (x == 1);
}
@@ -307,7 +304,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
std::vector<unsigned> Vs;
bool retval = llvm::findAllNVVMAnnotation(
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
- if (retval == false)
+ if (!retval)
return false;
for (int i = 0, e = Vs.size(); i < e; i++) {
unsigned v = Vs[i];
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
deleted file mode 100644
index 5f074b3..0000000
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===-- NVPTXutil.cpp - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTXutil.h"
-#include "NVPTX.h"
-
-using namespace llvm;
-
-namespace llvm {
-
-bool isParamLoad(const MachineInstr *MI) {
- if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
- (MI->getOpcode() != NVPTX::LD_i64_avar))
- return false;
- if (MI->getOperand(2).isImm() == false)
- return false;
- if (MI->getOperand(2).getImm() != NVPTX::PTXLdStInstCode::PARAM)
- return false;
- return true;
-}
-
-#define DATA_MASK 0x7f
-#define DIGIT_WIDTH 7
-#define MORE_BYTES 0x80
-
-static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
- char *a;
- char *end = space + splen;
-
- a = space;
- do {
- unsigned char uc;
-
- if (a >= end)
- return 1;
- uc = val & DATA_MASK;
- val >>= DIGIT_WIDTH;
- if (val != 0)
- uc |= MORE_BYTES;
- *a = uc;
- a++;
- } while (val);
- *nbytes = a - space;
- return 0;
-}
-
-#undef DATA_MASK
-#undef DIGIT_WIDTH
-#undef MORE_BYTES
-
-uint64_t encode_leb128(const char *str) {
- union {
- uint64_t x;
- char a[8];
- } temp64;
-
- temp64.x = 0;
-
- for (unsigned i = 0, e = strlen(str); i != e; ++i)
- temp64.a[i] = str[e - 1 - i];
-
- char encoded[16];
- int nbytes;
-
- int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
-
- (void) retval;
- assert(retval == 0 && "Encoding to leb128 failed");
-
- assert(nbytes <= 8 &&
- "Cannot support register names with leb128 encoding > 8 bytes");
-
- temp64.x = 0;
- for (int i = 0; i < nbytes; ++i)
- temp64.a[i] = encoded[i];
-
- return temp64.x;
-}
-
-} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXutil.h b/lib/Target/NVPTX/NVPTXutil.h
deleted file mode 100644
index 1915dac..0000000
--- a/lib/Target/NVPTX/NVPTXutil.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===-- NVPTXutil.h - Functions exported to CodeGen --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the functions that can be used in CodeGen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-#define LLVM_LIB_TARGET_NVPTX_NVPTXUTIL_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-
-namespace llvm {
-bool isParamLoad(const MachineInstr *);
-uint64_t encode_leb128(const char *str);
-}
-
-#endif
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index a8d6b95..5e375b7 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include <map>
#include <sstream>
@@ -137,6 +138,26 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// ConstantArray can be found successfully, see if it can be
// found in VarMap. If so, replace the uses of CallInst with the
// value found in VarMap. If not, replace the use with value 0.
+
+ // IR for __nvvm_reflect calls differs between CUDA versions:
+ // CUDA 6.5 and earlier uses this sequence:
+ // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8
+ // (i8 addrspace(4)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
+ //
+ // Value returned by Sym->getOperand(0) is a Constant with a
+ // ConstantDataSequential operand which can be converted to string and used
+ // for lookup.
+ //
+ // CUDA 7.0 does it slightly differently:
+ // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast
+ // (i8 addrspace(1)* getelementptr inbounds
+ // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
+ //
+ // In this case, we get a Constant with a GlobalVariable operand and we need
+ // to dig deeper to find its initializer with the string we'll use for lookup.
+
for (User *U : ReflectFunction->users()) {
assert(isa<CallInst>(U) && "Only a call instruction can use _reflect");
CallInst *Reflect = cast<CallInst>(U);
@@ -158,16 +179,23 @@ bool NVVMReflect::handleFunction(Function *ReflectFunction) {
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
- const Constant *SymStr = cast<Constant>(Sym);
+ const Value *Operand = cast<Constant>(Sym)->getOperand(0);
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) {
+ // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's
+ // initializer.
+ assert(GV->hasInitializer() &&
+ "Format of _reflect function not recognized");
+ const Constant *Initializer = GV->getInitializer();
+ Operand = Initializer;
+ }
- assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) &&
+ assert(isa<ConstantDataSequential>(Operand) &&
"Format of _reflect function not recognized");
-
- assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() &&
+ assert(cast<ConstantDataSequential>(Operand)->isCString() &&
"Format of _reflect function not recognized");
std::string ReflectArg =
- cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString();
+ cast<ConstantDataSequential>(Operand)->getAsString();
ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1);
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index bf00e73..99a1633 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -271,9 +271,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), MII(_MII) {
+ PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -425,7 +425,9 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU3Imm() const { return Kind == Immediate && isUInt<3>(getImm()); }
bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 0ed0723..a9f5fc7 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -189,6 +189,12 @@ static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, CRRegs);
}
+static DecodeStatus DecodeCRRC0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const void *Decoder) {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index c287fbe..311a4f2 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 1 && "Invalid u1imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -221,6 +228,13 @@ void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU3ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 8 && "Invalid u3imm argument!");
+ O << (unsigned int)Value;
+}
+
void PPCInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 6ead19b..8718743 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -43,7 +43,9 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = nullptr);
+ void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU3ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2b4f2d8..d8fab5b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -45,6 +45,10 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
void PPCELFMCAsmInfo::anchor() { }
PPCELFMCAsmInfo::PPCELFMCAsmInfo(bool is64Bit, const Triple& T) {
+ // FIXME: This is not always needed. For example, it is not needed in the
+ // v2 abi.
+ NeedsLocalForSize = true;
+
if (is64Bit) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 06d380e..b9f0afb 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -39,10 +40,10 @@ class PPCMCCodeEmitter : public MCCodeEmitter {
bool IsLittleEndian;
public:
- PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
- : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
- }
-
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), CTX(ctx),
+ IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
+
~PPCMCCodeEmitter() {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
@@ -158,14 +159,11 @@ public:
};
} // end anonymous namespace
-
+
MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
- Triple TT(STI.getTargetTriple());
- bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
- return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+ return new PPCMCCodeEmitter(MCII, Ctx);
}
unsigned PPCMCCodeEmitter::
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index f0a6bb9..1c840d9 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -36,9 +36,8 @@ private:
int64_t EvaluateAsInt64(int64_t Value) const;
- explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
- bool _IsDarwin)
- : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+ explicit PPCMCExpr(VariantKind Kind, const MCExpr *Expr, bool IsDarwin)
+ : Kind(Kind), Expr(Expr), IsDarwin(IsDarwin) {}
public:
/// @name Construction
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index f2da389..2f7a768 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -145,6 +145,7 @@ public:
}
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
+ Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
void emitMachine(StringRef CPU) override {
@@ -222,32 +223,19 @@ public:
};
}
-// This is duplicated code. Refactor this.
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- if (Triple(TT).isOSDarwin()) {
- MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetMachOStreamer(*S);
- return S;
- }
-
- MCStreamer *S = createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- new PPCTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new PPCTargetAsmStreamer(S, OS);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new PPCTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ Triple TT(STI.getTargetTriple());
+ if (TT.getObjectFormat() == Triple::ELF)
+ return new PPCTargetELFStreamer(S);
+ return new PPCTargetMachOStreamer(S);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -261,60 +249,36 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
}
extern "C" void LLVMInitializePowerPCTargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
- RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
- createPPCMCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
- createPPCMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
- createPPCMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
- createPPCMCSubtargetInfo);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
- createPPCMCCodeEmitter);
-
+ for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createPPCMCCodeEmitter);
+
// Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
- createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createPPCAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createPPCMCInstPrinter);
+ }
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 68f7f7a..8b1e3b4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -34,10 +34,9 @@ class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
extern Target ThePPC64LETarget;
-
+
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f53add5..f175f6d 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -18,7 +18,7 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// PowerPC Subtarget features.
//
-
+
//===----------------------------------------------------------------------===//
// CPU Directives //
//===----------------------------------------------------------------------===//
@@ -112,14 +112,21 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
+def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
+ "Enable POWER8 Crypto instructions",
+ [FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
-
+def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
+ "HasPartwordAtomics", "true",
+ "Enable l[bh]arx and st[bh]cx.">;
def FeatureInvariantFunctionDescriptors :
SubtargetFeature<"invariant-function-descriptors",
"HasInvariantFunctionDescriptors", "true",
"Assume function descriptors are invariant">;
+def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
+ "Enable Hardware Transactional Memory instructions">;
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
"Treat mftb as deprecated">;
@@ -256,11 +263,11 @@ def ProcessorFeatures {
[DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
- DeprecatedMFTB, DeprecatedDST];
+ FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
}
def : ProcessorModel<"970", G5Model,
@@ -339,7 +346,7 @@ def : ProcessorModel<"pwr7", P7Model,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */,
+ Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1327290..cd60906 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -69,12 +69,11 @@ namespace {
protected:
MapVector<MCSymbol*, MCSymbol*> TOC;
const PPCSubtarget *Subtarget;
- uint64_t TOCLabelID;
StackMaps SM;
public:
explicit PPCAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), TOCLabelID(0), SM(*this) {}
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
const char *getPassName() const override {
return "PowerPC Assembly Printer";
@@ -321,17 +320,9 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
- const DataLayout *DL = TM.getDataLayout();
MCSymbol *&TOCEntry = TOC[Sym];
-
- // To avoid name clash check if the name already exists.
- while (!TOCEntry) {
- if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == nullptr) {
- TOCEntry = GetTempSymbol("C", TOCLabelID);
- }
- }
-
+ if (!TOCEntry)
+ TOCEntry = createTempSymbol("C");
return TOCEntry;
}
@@ -1068,8 +1059,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.SwitchSection(Section);
OutStreamer.EmitLabel(CurrentFnSym);
OutStreamer.EmitValueToAlignment(8);
- MCSymbol *Symbol1 =
- OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ MCSymbol *Symbol1 = CurrentFnSymForSize;
// Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
// entry point.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
@@ -1082,11 +1072,6 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
OutStreamer.SwitchSection(Current.first, Current.second);
-
- MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
- ".L." + Twine(CurrentFnSym->getName()));
- OutStreamer.EmitLabel(RealFnSym);
- CurrentFnSymForSize = RealFnSym;
}
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 5af8aab..c595f44 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -171,8 +171,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : nullptr;
+ DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
@@ -533,7 +532,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// selected branch.
MadeChange = true;
- SCEVExpander SCEVE(*SE, "loopcnt");
+ SCEVExpander SCEVE(*SE, Preheader->getModule()->getDataLayout(), "loopcnt");
LLVMContext &C = SE->getContext();
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
Type::getInt32Ty(C);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 54532b5..fbd7b6d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -675,8 +675,18 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
case PPC::STFS: Opc = PPC::STFSX; break;
case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+
+ auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg);
+
+ // If we have an index register defined we use it in the store inst,
+ // otherwise we use X0 as base as it makes the vector instructions to
+ // use zero in the computation of the effective address regardless the
+ // content of the register.
+ if (IndexReg)
+ MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+ else
+ MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
}
return true;
@@ -1532,7 +1542,7 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
CLI.Call = MIB;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b10e854..3ac8e94 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -186,20 +186,34 @@ namespace {
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
- // We need to make sure that this one operand does not end up in r0
- // (because we might end up lowering this as 0(%op)).
- const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
- const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
- SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
- SDValue NewOp =
- SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
- SDLoc(Op), Op.getValueType(),
- Op, RC), 0);
-
- OutOps.push_back(NewOp);
- return false;
+
+ switch(ConstraintID) {
+ default:
+ errs() << "ConstraintID: " << ConstraintID << "\n";
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_es:
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_Z:
+ case InlineAsm::Constraint_Zy:
+ // We need to make sure that this one operand does not end up in r0
+ // (because we might end up lowering this as 0(%op)).
+ const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
+ const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
+ SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+ SDValue NewOp =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ SDLoc(Op), Op.getValueType(),
+ Op, RC), 0);
+
+ OutOps.push_back(NewOp);
+ return false;
+ }
+ return true;
}
void InsertVRSaveCode(MachineFunction &MF);
@@ -2105,7 +2119,7 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
// getVCmpInst: return the vector compare instruction for the specified
// vector type and condition code. Since this is for altivec specific code,
-// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+// only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
bool HasVSX, bool &Swap, bool &Negate) {
Swap = false;
@@ -2184,6 +2198,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPEQUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPEQUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPEQUD;
break;
case ISD::SETGT:
if (VecVT == MVT::v16i8)
@@ -2192,6 +2208,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTSH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTSW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTSD;
break;
case ISD::SETUGT:
if (VecVT == MVT::v16i8)
@@ -2200,6 +2218,8 @@ static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
return PPC::VCMPGTUH;
else if (VecVT == MVT::v4i32)
return PPC::VCMPGTUW;
+ else if (VecVT == MVT::v2i64)
+ return PPC::VCMPGTUD;
break;
default:
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 147e94b..871531e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -516,7 +516,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
- setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
+ if (Subtarget.hasP8Altivec())
+ setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ else
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -574,15 +579,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
- // VSX v2i64 only supports non-arithmetic operations.
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ if (Subtarget.hasP8Altivec()) {
+ setOperationAction(ISD::SHL, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i64, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i64, Legal);
- setOperationAction(ISD::SHL, MVT::v2i64, Expand);
- setOperationAction(ISD::SRA, MVT::v2i64, Expand);
- setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
+ }
+ else {
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+ }
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
@@ -892,6 +906,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
MaxStoresPerMemcpyOptSize = 8;
MaxStoresPerMemmove = 32;
MaxStoresPerMemmoveOptSize = 8;
+ } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 also benefits from (very) aggressive inlining of memcpy and
+ // friends. The overhead of a the function call, even when warm, can be
+ // over one hundred cycles.
+ MaxStoresPerMemset = 128;
+ MaxStoresPerMemcpy = 128;
+ MaxStoresPerMemmove = 128;
}
}
@@ -981,8 +1002,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::STBRX: return "PPCISD::STBRX";
case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
- case PPCISD::LARX: return "PPCISD::LARX";
- case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
case PPCISD::BDNZ: return "PPCISD::BDNZ";
case PPCISD::BDZ: return "PPCISD::BDZ";
@@ -1384,17 +1403,10 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// immediate field for would be zero, and we prefer to use vxor for it.
if (ValSizeInBytes < ByteSize) return SDValue();
- // If the element value is larger than the splat value, cut it in half and
- // check to see if the two halves are equal. Continue doing this until we
- // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
- while (ValSizeInBytes > ByteSize) {
- ValSizeInBytes >>= 1;
-
- // If the top half equals the bottom half, we're still ok.
- if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
- (Value & ((1 << (8*ValSizeInBytes))-1)))
- return SDValue();
- }
+ // If the element value is larger than the splat value, check if it consists
+ // of a repeated bit pattern of size ByteSize.
+ if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
+ return SDValue();
// Properly sign extend the value.
int MaskVal = SignExtend32(Value, ByteSize * 8);
@@ -2436,27 +2448,16 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// FPR - The set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const MCPhysReg *GetFPR() {
- static const MCPhysReg FPR[] = {
- PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
- PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
- };
+static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
+ PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
+ PPC::F11, PPC::F12, PPC::F13};
- return FPR;
-}
-
-/// GetQFPR - Get the set of QPX registers that should be allocated for
-/// arguments.
-static const MCPhysReg *GetQFPR() {
- static const MCPhysReg QFPR[] = {
- PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
- PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
- };
-
- return QFPR;
-}
+/// QFPR - The set of QPX registers that should be allocated for arguments.
+static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
@@ -2880,9 +2881,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -2892,8 +2890,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
@@ -3291,9 +3287,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
-
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4187,7 +4180,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -4582,8 +4576,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -4593,8 +4585,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
- static const MCPhysReg *QFPR = GetQFPR();
-
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
@@ -5280,8 +5270,6 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const MCPhysReg *FPR = GetFPR();
-
static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
@@ -6418,7 +6406,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
- static const EVT VTys[] = { // canonical VT to use for each size.
+ static const MVT VTys[] = { // canonical VT to use for each size.
MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
};
@@ -7045,7 +7033,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
/// altivec comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot) {
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7058,29 +7046,83 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 1;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 1;
+ }
+ else
+ return false;
+ break;
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 199;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsd:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 967;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtud:
+ if (Subtarget.hasP8Altivec()) {
+ CompareOpc = 711;
+ isDot = 0;
+ }
+ else
+ return false;
+
+ break;
}
return true;
}
@@ -7094,7 +7136,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7738,10 +7780,36 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
MachineBasicBlock *
PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- bool is64bit, unsigned BinOpcode) const {
+ unsigned AtomicSize,
+ unsigned BinOpcode) const {
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch (AtomicSize) {
+ default:
+ llvm_unreachable("Unexpected size of atomic entity");
+ case 1:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 2:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
+ break;
+ case 4:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case 8:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
MachineFunction::iterator It = BB;
@@ -7763,7 +7831,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
- RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass
+ RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// thisMBB:
@@ -7778,11 +7846,11 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// bne- loopMBB
// fallthrough --> exitMBB
BB = loopMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
if (BinOpcode)
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
@@ -7800,6 +7868,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *BB,
bool is8bit, // operation
unsigned BinOpcode) const {
+ // If we support part-word atomic mnemonics, just use them
+ if (Subtarget.hasPartwordAtomics())
+ return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
+
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
// In 64 bit mode we have to use 64 bits for addresses, even though the
@@ -8365,68 +8437,96 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+ BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
- BB = EmitAtomicBinary(MI, BB, false, 0);
+ BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
- BB = EmitAtomicBinary(MI, BB, true, 0);
+ BB = EmitAtomicBinary(MI, BB, 8, 0);
else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
- MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
+ (Subtarget.hasPartwordAtomics() &&
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+ auto LoadMnemonic = PPC::LDARX;
+ auto StoreMnemonic = PPC::STDCX;
+ switch(MI->getOpcode()) {
+ default:
+ llvm_unreachable("Compare and swap of unknown size");
+ case PPC::ATOMIC_CMP_SWAP_I8:
+ LoadMnemonic = PPC::LBARX;
+ StoreMnemonic = PPC::STBCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I16:
+ LoadMnemonic = PPC::LHARX;
+ StoreMnemonic = PPC::STHCX;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I32:
+ LoadMnemonic = PPC::LWARX;
+ StoreMnemonic = PPC::STWCX;
+ break;
+ case PPC::ATOMIC_CMP_SWAP_I64:
+ LoadMnemonic = PPC::LDARX;
+ StoreMnemonic = PPC::STDCX;
+ break;
+ }
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
unsigned ptrB = MI->getOperand(2).getReg();
@@ -8452,18 +8552,18 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(loop1MBB);
// loop1MBB:
- // l[wd]arx dest, ptr
+ // l[bhwd]arx dest, ptr
// cmp[wd] dest, oldval
// bne- midMBB
// loop2MBB:
- // st[wd]cx. newval, ptr
+ // st[bhwd]cx. newval, ptr
// bne- loopMBB
// b exitBB
// midMBB:
- // st[wd]cx. dest, ptr
+ // st[bhwd]cx. dest, ptr
// exitBB:
BB = loop1MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
.addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
.addReg(oldval).addReg(dest);
@@ -8473,7 +8573,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(midMBB);
BB = loop2MBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(newval).addReg(ptrA).addReg(ptrB);
BuildMI(BB, dl, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
@@ -8482,7 +8582,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(exitMBB);
BB = midMBB;
- BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ BuildMI(BB, dl, TII->get(StoreMnemonic))
.addReg(dest).addReg(ptrA).addReg(ptrB);
BB->addSuccessor(exitMBB);
@@ -8682,6 +8782,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
MI->getOperand(0).getReg())
.addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else if (MI->getOpcode() == PPC::TCHECK_RET) {
+ DebugLoc Dl = MI->getDebugLoc();
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+ BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
+ return BB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -10184,7 +10290,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10297,14 +10403,17 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case Intrinsic::ppc_altivec_vcmpequb_p:
case Intrinsic::ppc_altivec_vcmpequh_p:
case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
case Intrinsic::ppc_altivec_vcmpgefp_p:
case Intrinsic::ppc_altivec_vcmpgtfp_p:
case Intrinsic::ppc_altivec_vcmpgtsb_p:
case Intrinsic::ppc_altivec_vcmpgtsh_p:
case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
case Intrinsic::ppc_altivec_vcmpgtub_p:
case Intrinsic::ppc_altivec_vcmpgtuh_p:
case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
KnownZero = ~1U; // All bits but the low one are known to be zero.
break;
}
@@ -10914,11 +11023,27 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+
if (Subtarget.isPPC64()) {
return MVT::i64;
- } else {
- return MVT::i32;
}
+
+ return MVT::i32;
}
/// \brief Returns true if it is beneficial to convert a load of a constant
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 04afe88..8afd7ef 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -166,14 +166,6 @@ namespace llvm {
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
- /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
- /// reserve indexed. This is used to implement atomic operations.
- LARX,
-
- /// STCX = This corresponds to PPC stcx. instrcution: store conditional
- /// indexed. This is used to implement atomic operations.
- STCX,
-
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
@@ -489,7 +481,8 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *MBB, bool is64Bit,
+ MachineBasicBlock *MBB,
+ unsigned AtomicSize,
unsigned BinOpcode) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB,
@@ -526,6 +519,21 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode == "es")
+ return InlineAsm::Constraint_es;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
+ else if (ConstraintCode == "Q")
+ return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "Z")
+ return InlineAsm::Constraint_Z;
+ else if (ConstraintCode == "Zy")
+ return InlineAsm::Constraint_Zy;
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 69c0d7d..183d088 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -235,15 +235,19 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", IIC_LdStLDARX,
- [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+ "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
+
+// Instruction to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isDOT;
+}
-let Defs = [CR0] in
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX,
- [(PPCstcx i64:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
@@ -325,6 +329,12 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
Requires<[In64BitMode]>;
}
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
@@ -696,7 +706,7 @@ def ISEL8 : AForm_4<31, 15,
// Sign extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
"lha $rD, $src", IIC_LdStLHA,
@@ -752,7 +762,7 @@ def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (zextloadi8 iaddr:$src))]>;
@@ -810,7 +820,7 @@ def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
"ld $rD, $src", IIC_LdStLD,
[(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f6acd6e..123808b 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
!strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
+class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
+ !strconcat(opc, " $vD, $vA"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA))]>;
+
+class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -342,7 +352,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -750,7 +760,7 @@ def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-
+
let isCodeGenOnly = 1 in {
def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", IIC_VecFP,
@@ -939,8 +949,50 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
+let isCommutable = 1 in {
+def VMULESW : VX1_Int_Ty2<904, "vmulesw", int_ppc_altivec_vmulesw,
+ v2i64, v4i32>;
+def VMULEUW : VX1_Int_Ty2<648, "vmuleuw", int_ppc_altivec_vmuleuw,
+ v2i64, v4i32>;
+def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
+ v2i64, v4i32>;
+def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
+ v2i64, v4i32>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
+def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
+def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
+def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
+} // isCommutable
+
+// Vector shifts
+def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
+def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsld $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+
+// Vector Integer Arithmetic Instructions
+let isCommutable = 1 in {
+def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
+} // isCommutable
+
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+
// Count Leading Zeros
def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
"vclzb $vD, $vB", IIC_VecGeneral,
@@ -992,4 +1044,42 @@ def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vorc $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (or v4i32:$vA,
(vnot_ppc v4i32:$vB)))]>;
+
+// i64 element comparisons.
+def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
+def VCMPEQUDo : VCMPo<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
+def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
+def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+
+// The cryptography instructions that do not require Category:Vector.Crypto
+def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
+ int_ppc_altivec_crypto_vpmsumb, v16i8>;
+def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
+ int_ppc_altivec_crypto_vpmsumh, v8i16>;
+def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
+ int_ppc_altivec_crypto_vpmsumw, v4i32>;
+def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
+ int_ppc_altivec_crypto_vpmsumd, v2i64>;
+def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
+ int_ppc_altivec_crypto_vpermxor, v16i8>;
+
} // end HasP8Altivec
+
+// Crypto instructions (from builtins)
+let Predicates = [HasP8Crypto] in {
+def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
+ int_ppc_altivec_crypto_vshasigmaw, v4i32>;
+def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
+ int_ppc_altivec_crypto_vshasigmad, v2i64>;
+def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
+ v2i64>;
+def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
+ int_ppc_altivec_crypto_vcipherlast, v2i64>;
+def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
+ int_ppc_altivec_crypto_vncipher, v2i64>;
+def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
+ int_ppc_altivec_crypto_vncipherlast, v2i64>;
+def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
+} // HasP8Crypto
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 506a2d0..b7a7a1f 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -693,6 +693,60 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let A = 0;
}
+class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit R;
+
+ bit RC = 1;
+
+ let Inst{6-9} = 0;
+ let Inst{10} = R;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit A;
+
+ bit RC = 1;
+
+ let Inst{6} = A;
+ let Inst{7-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit L;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{7-9} = 0;
+ let Inst{10} = L;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+
+ bit RC = 0;
+
+ let Inst{6-8} = BF;
+ let Inst{9-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
// XX*-Form (VSX)
class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -1470,6 +1524,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
let Inst{21-31} = xo;
}
+/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
+class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<1> ST;
+ bits<4> SIX;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16} = ST;
+ let Inst{17-20} = SIX;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
+class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
// E-4 VXR-Form
class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/lib/Target/PowerPC/PPCInstrHTM.td b/lib/Target/PowerPC/PPCInstrHTM.td
new file mode 100644
index 0000000..20e6a62
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrHTM.td
@@ -0,0 +1,172 @@
+//===-- PPCInstrHTM.td - The PowerPC Hardware Transactional Memory -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hardware Transactional Memory extension to the
+// PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+def HasHTM : Predicate<"PPCSubTarget->hasHTM()">;
+
+def HTM_get_imm : SDNodeXForm<imm, [{
+ return getI32Imm (N->getZExtValue());
+}]>;
+
+let hasSideEffects = 1, usesCustomInserter = 1 in {
+def TCHECK_RET : Pseudo<(outs crrc:$out), (ins), "#TCHECK_RET", []>;
+}
+
+
+let Predicates = [HasHTM] in {
+
+def TBEGIN : XForm_htm0 <31, 654,
+ (outs crrc0:$ret), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+
+def TEND : XForm_htm1 <31, 686,
+ (outs crrc0:$ret), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+
+def TABORT : XForm_base_r3xo <31, 910,
+ (outs crrc0:$ret), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ []>, isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TABORTWC : XForm_base_r3xo <31, 782,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTWCI : XForm_base_r3xo <31, 846,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDC : XForm_base_r3xo <31, 814,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, gprc:$B),
+ "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TABORTDCI : XForm_base_r3xo <31, 878,
+ (outs crrc0:$ret), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
+ "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TSR : XForm_htm2 <31, 750,
+ (outs crrc0:$ret), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ isDOT;
+
+def TCHECK : XForm_htm3 <31, 718,
+ (outs), (ins crrc:$BF), "tcheck $BF", IIC_SprMTSPR, []>;
+
+
+def TRECLAIM : XForm_base_r3xo <31, 942,
+ (outs crrc:$ret), (ins gprc:$A), "treclaim. $A",
+ IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let B = 0;
+}
+
+def TRECHKPT : XForm_base_r3xo <31, 1006,
+ (outs crrc:$ret), (ins), "trechkpt.", IIC_SprMTSPR, []>,
+ isDOT {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+// Builtins
+
+// All HTM instructions, with the exception of tcheck, set CR0 with the
+// value of the MSR Transaction State (TS) bits that exist before the
+// instruction is executed. For tbegin., the EQ bit in CR0 can be used
+// to determine whether the transaction was successfully started (0) or
+// failed (1). We use an XORI pattern to 'flip' the bit to match the
+// tbegin builtin API which defines a return value of 1 as success.
+
+def : Pat<(int_ppc_tbegin i32:$R),
+ (XORI
+ (EXTRACT_SUBREG (
+ TBEGIN (HTM_get_imm imm:$R)), sub_eq),
+ 1)>;
+
+def : Pat<(int_ppc_tend i32:$R),
+ (TEND (HTM_get_imm imm:$R))>;
+
+
+def : Pat<(int_ppc_tabort i32:$R),
+ (TABORT $R)>;
+
+def : Pat<(int_ppc_tabortwc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTWC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortwci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTWCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tabortdc i32:$TO, i32:$RA, i32:$RB),
+ (TABORTDC (HTM_get_imm imm:$TO), $RA, $RB)>;
+
+def : Pat<(int_ppc_tabortdci i32:$TO, i32:$RA, i32:$SI),
+ (TABORTDCI (HTM_get_imm imm:$TO), $RA, (HTM_get_imm imm:$SI))>;
+
+def : Pat<(int_ppc_tcheck),
+ (TCHECK_RET)>;
+
+def : Pat<(int_ppc_treclaim i32:$RA),
+ (TRECLAIM $RA)>;
+
+def : Pat<(int_ppc_trechkpt),
+ (TRECHKPT)>;
+
+def : Pat<(int_ppc_tsr i32:$L),
+ (TSR (HTM_get_imm imm:$L))>;
+
+def : Pat<(int_ppc_get_texasr),
+ (MFSPR8 130)>;
+
+def : Pat<(int_ppc_get_texasru),
+ (MFSPR8 131)>;
+
+def : Pat<(int_ppc_get_tfhar),
+ (MFSPR8 128)>;
+
+def : Pat<(int_ppc_get_tfiar),
+ (MFSPR8 129)>;
+
+
+def : Pat<(int_ppc_set_texasr i64:$V),
+ (MTSPR8 130, $V)>;
+
+def : Pat<(int_ppc_set_texasru i64:$V),
+ (MTSPR8 131, $V)>;
+
+def : Pat<(int_ppc_set_tfhar i64:$V),
+ (MTSPR8 128, $V)>;
+
+def : Pat<(int_ppc_set_tfiar i64:$V),
+ (MTSPR8 129, $V)>;
+
+
+// Extended mnemonics
+def : Pat<(int_ppc_tendall),
+ (TEND 1)>;
+
+def : Pat<(int_ppc_tresume),
+ (TSR 1)>;
+
+def : Pat<(int_ppc_tsuspend),
+ (TSR 0)>;
+
+def : Pat<(i64 (int_ppc_ttest)),
+ (RLDICL (i64 (COPY (TABORTWCI 0, ZERO, 0))), 36, 28)>;
+
+} // [HasHTM]
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fe9474a..c9c2949 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -61,7 +61,7 @@ void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- Subtarget(STI), RI(STI) {}
+ Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
@@ -113,9 +113,8 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
- const TargetRegisterInfo *TRI = &getRegisterInfo();
bool IsRegCR;
- if (TRI->isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
const MachineRegisterInfo *MRI =
&DefMI->getParent()->getParent()->getRegInfo();
IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
@@ -697,6 +696,33 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
.addReg(Cond[1].getReg(), 0, SubIdx);
}
+static unsigned getCRBitValue(unsigned CRBit) {
+ unsigned Ret = 4;
+ if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||
+ CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||
+ CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||
+ CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)
+ Ret = 3;
+ if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||
+ CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||
+ CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||
+ CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)
+ Ret = 2;
+ if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||
+ CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||
+ CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||
+ CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)
+ Ret = 1;
+ if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||
+ CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||
+ CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||
+ CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)
+ Ret = 0;
+
+ assert(Ret != 4 && "Invalid CR bit register");
+ return Ret;
+}
+
void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -742,6 +768,32 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
SrcReg = SuperReg;
}
+ // Different class register copy
+ if (PPC::CRBITRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ unsigned CRReg = getCRFromCRBit(SrcReg);
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(CRReg), getKillRegState(KillSrc);
+ // Rotate the CR bit in the CR fields to be the least significant bit and
+ // then mask with 0x1 (MB = ME = 31).
+ BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)
+ .addReg(DestReg, RegState::Kill)
+ .addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))
+ .addImm(31)
+ .addImm(31);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::G8RCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF8), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ } else if (PPC::CRRCRegClass.contains(SrcReg) &&
+ PPC::GPRCRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg)
+ .addReg(SrcReg), getKillRegState(KillSrc);
+ return;
+ }
+
unsigned Opc;
if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::OR;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 4add6f9..7fd076a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -63,7 +63,7 @@ enum PPC970_Unit {
};
} // end namespace PPCII
-
+class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1a045b1..5eff156 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -46,13 +46,6 @@ def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;
-def SDT_PPClarx : SDTypeProfile<1, 1, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-def SDT_PPCstcx : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisPtrTy<1>
-]>;
-
def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
SDTCisPtrTy<0>, SDTCisVT<1, i32>
]>;
@@ -225,12 +218,6 @@ def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// Instructions to support atomic operations
-def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
- [SDNPHasChain, SDNPMayLoad]>;
-def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
- [SDNPHasChain, SDNPMayStore]>;
-
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
@@ -445,6 +432,18 @@ def PPCRegCRRCAsmOperand : AsmOperandClass {
def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
+def crrc0 : RegisterOperand<CRRC0> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU1ImmAsmOperand : AsmOperandClass {
+ let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u1imm : Operand<i32> {
+ let PrintMethod = "printU1ImmOperand";
+ let ParserMatchClass = PPCU1ImmAsmOperand;
+}
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
@@ -455,6 +454,15 @@ def u2imm : Operand<i32> {
let ParserMatchClass = PPCU2ImmAsmOperand;
}
+def PPCU3ImmAsmOperand : AsmOperandClass {
+ let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u3imm : Operand<i32> {
+ let PrintMethod = "printU3ImmOperand";
+ let ParserMatchClass = PPCU3ImmAsmOperand;
+}
+
def PPCU4ImmAsmOperand : AsmOperandClass {
let Name = "U4Imm"; let PredicateMethod = "isU4Imm";
let RenderMethod = "addImmOperands";
@@ -715,7 +723,7 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">;
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
-
+def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
@@ -1446,15 +1454,44 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
+let mayLoad = 1, hasSideEffects = 0 in {
+def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src", IIC_LdStLWARX, []>,
+ Requires<[HasPartwordAtomics]>;
+
def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", IIC_LdStLWARX,
- [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+ "lwarx $rD, $src", IIC_LdStLWARX, []>;
+
+// Instructions to support lock versions of atomics
+// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
+def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
+ "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src),
+ "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT,
+ Requires<[HasPartwordAtomics]>;
+
+def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isDOT;
+}
+
+let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
+
+def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+ isDOT, Requires<[HasPartwordAtomics]>;
-let Defs = [CR0] in
def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX,
- [(PPCstcx i32:$rS, xoaddr:$dst)]>,
- isDOT;
+ "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
+}
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
@@ -1473,7 +1510,7 @@ def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
//
// Unindexed (r+i) Loads.
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
"lbz $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 iaddr:$src))]>;
@@ -1570,7 +1607,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let PPC970_Unit = 2 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -2683,6 +2720,7 @@ include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
include "PPCInstrQPX.td"
+include "PPCInstrHTM.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index c984d46..5c66b42 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -501,7 +501,7 @@ let Uses = [RM] in {
"qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def QVLFDX : XForm_1<31, 583,
(outs qfrc:$FRT), (ins memrr:$src),
"qvlfdx $FRT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index d6cb3a0..ec04da4 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -66,7 +66,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1, canFoldAsLoad = 1 in {
+ let mayLoad = 1 in {
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
"lxsdx $XT, $src", IIC_LdStLFD,
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index efd2d92..005bcaf 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -104,7 +104,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DL = F.getParent()->getDataLayout();
+ DL = &F.getParent()->getDataLayout();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -192,7 +192,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
if (const SCEVConstant *ConstPtrDiff =
dyn_cast<SCEVConstant>(PtrDiff)) {
- int64_t PD = abs64(ConstPtrDiff->getValue()->getSExtValue());
+ int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
if (PD < (int64_t) CacheLineSize) {
DupPref = true;
break;
@@ -211,7 +211,7 @@ bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
- SCEVExpander SCEVE(*SE, "prefaddr");
+ SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
IRBuilder<> Builder(MemI);
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index df65227..092a4ef 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
@@ -84,7 +85,6 @@ namespace {
PPCTargetMachine *TM;
LoopInfo *LI;
ScalarEvolution *SE;
- const DataLayout *DL;
};
}
@@ -141,9 +141,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolution>();
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
-
bool MadeChange = false;
for (LoopInfo::iterator I = LI->begin(), E = LI->end();
@@ -158,9 +155,6 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
bool MadeChange = false;
- if (!DL)
- return MadeChange;
-
// Only prep. the inner-most loop
if (!L->empty())
return MadeChange;
@@ -261,6 +255,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
+ Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
BasePtr->getType()->getPointerAddressSpace());
@@ -280,7 +275,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
MemI->hasName() ? MemI->getName() + ".phi" : "",
Header->getFirstNonPHI());
- SCEVExpander SCEVE(*SE, "pistart");
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
LoopPredecessor->getTerminator());
@@ -295,8 +290,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
Instruction *InsPoint = Header->getFirstInsertionPt();
- GetElementPtrInst *PtrInc =
- GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(),
+ GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -341,9 +336,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
PtrIP = PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
PtrIP = I->second;
-
- GetElementPtrInst *NewPtr =
- GetElementPtrInst::Create(PtrInc, Diff->getValue(),
+
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, Diff->getValue(),
I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 819738b..0965cb3 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -184,6 +184,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ assert(MO.getReg() > PPC::NoRegister &&
+ MO.getReg() < PPC::NUM_TARGET_REGS &&
+ "Invalid register for this target!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index c9a9684..0e568d3 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -18,6 +18,7 @@
#include "PPCInstrBuilder.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -56,11 +57,11 @@ static cl::opt<bool>
AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
cl::desc("Force the use of a base pointer in every function"));
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
- : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
- ST.isPPC64() ? 0 : 1,
- ST.isPPC64() ? 0 : 1),
- Subtarget(ST) {
+PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
+ : PPCGenRegisterInfo(TM.isPPC64() ? PPC::LR8 : PPC::LR,
+ TM.isPPC64() ? 0 : 1,
+ TM.isPPC64() ? 0 : 1),
+ TM(TM) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -87,18 +88,19 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
// Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
// when it checks for ZERO folding.
if (Kind == 1) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RC_NOX0RegClass;
return &PPC::GPRC_NOR0RegClass;
}
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return &PPC::G8RCRegClass;
return &PPC::GPRCRegClass;
}
const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const PPCSubtarget &Subtarget = MF->getSubtarget<PPCSubtarget>();
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_SaveList;
@@ -108,28 +110,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_SaveList :
- CSR_Darwin64_SaveList) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_SaveList :
- CSR_Darwin32_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_SaveList
+ : CSR_Darwin64_SaveList)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_SaveList
+ : CSR_Darwin32_SaveList);
// On PPC64, we might need to save r2 (but only if it is not reserved).
bool SaveR2 = MF->getRegInfo().isAllocatable(PPC::X2);
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList :
- CSR_SVR464_Altivec_SaveList) :
- (SaveR2 ? CSR_SVR464_R2_SaveList :
- CSR_SVR464_SaveList)) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_SaveList :
- CSR_SVR432_SaveList);
+ return TM.isPPC64()
+ ? (Subtarget.hasAltivec()
+ ? (SaveR2 ? CSR_SVR464_R2_Altivec_SaveList
+ : CSR_SVR464_Altivec_SaveList)
+ : (SaveR2 ? CSR_SVR464_R2_SaveList : CSR_SVR464_SaveList))
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_SaveList
+ : CSR_SVR432_SaveList);
}
-const uint32_t*
-PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX())
return CSR_64_AllRegs_VSX_RegMask;
@@ -139,19 +141,15 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
}
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_Darwin64_Altivec_RegMask :
- CSR_Darwin64_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_Darwin32_Altivec_RegMask :
- CSR_Darwin32_RegMask);
-
- return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
- CSR_SVR464_Altivec_RegMask :
- CSR_SVR464_RegMask) :
- (Subtarget.hasAltivec() ?
- CSR_SVR432_Altivec_RegMask :
- CSR_SVR432_RegMask);
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_Darwin64_Altivec_RegMask
+ : CSR_Darwin64_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
+ : CSR_Darwin32_RegMask);
+
+ return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR464_Altivec_RegMask
+ : CSR_SVR464_RegMask)
+ : (Subtarget.hasAltivec() ? CSR_SVR432_Altivec_RegMask
+ : CSR_SVR432_RegMask);
}
const uint32_t*
@@ -160,15 +158,13 @@ PPCRegisterInfo::getNoPreservedMask() const {
}
void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
- unsigned PseudoRegs[] = { PPC::ZERO, PPC::ZERO8, PPC::RM };
- for (unsigned i = 0, ie = array_lengthof(PseudoRegs); i != ie; ++i) {
- unsigned Reg = PseudoRegs[i];
- Mask[Reg / 32] &= ~(1u << (Reg % 32));
- }
+ for (unsigned PseudoReg : {PPC::ZERO, PPC::ZERO8, PPC::RM})
+ Mask[PseudoReg / 32] &= ~(1u << (PseudoReg % 32));
}
BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
@@ -207,7 +203,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (Subtarget.isPPC64()) {
+ if (TM.isPPC64()) {
Reserved.set(PPC::R13);
Reserved.set(PPC::X1);
@@ -238,15 +234,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R31);
if (hasBasePointer(MF)) {
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R29);
else
Reserved.set(PPC::R30);
}
- if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ if (Subtarget.isSVR4ABI() && !TM.isPPC64() &&
+ TM.getRelocationModel() == Reloc::PIC_)
Reserved.set(PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
@@ -260,6 +256,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const unsigned DefaultSafety = 1;
@@ -291,8 +288,10 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
- const TargetRegisterClass *RC) const {
+const TargetRegisterClass *
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (Subtarget.hasVSX()) {
// With VSX, we can inflate various sub-register classes to the full VSX
// register set.
@@ -303,7 +302,7 @@ const TargetRegisterClass *PPCRegisterInfo::getLargestLegalSuperClass(
return &PPC::VSRCRegClass;
}
- return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
}
//===----------------------------------------------------------------------===//
@@ -326,10 +325,11 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Determine whether 64-bit pointers are used.
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
DebugLoc dl = MI.getDebugLoc();
// Get the maximum call stack size.
@@ -443,10 +443,11 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -487,10 +488,11 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -522,37 +524,6 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-static unsigned getCRFromCRBit(unsigned SrcReg) {
- unsigned Reg = 0;
- if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
- SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
- Reg = PPC::CR0;
- else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
- SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
- Reg = PPC::CR1;
- else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
- SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
- Reg = PPC::CR2;
- else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
- SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
- Reg = PPC::CR3;
- else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
- SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
- Reg = PPC::CR4;
- else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
- SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
- Reg = PPC::CR5;
- else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
- SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
- Reg = PPC::CR6;
- else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
- SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
- Reg = PPC::CR7;
-
- assert(Reg != 0 && "Invalid CR bit register");
- return Reg;
-}
-
void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const {
// Get the instruction.
@@ -560,10 +531,11 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -603,10 +575,11 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
- bool LP64 = Subtarget.isPPC64();
+ bool LP64 = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -650,6 +623,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -675,6 +649,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -697,14 +672,14 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
bool
PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
// For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
// is arbitrary and will be subsequently ignored. For 32-bit, we have
// previously created the stack slot if needed, so return its FrameIdx.
if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
FrameIdx = 0;
else {
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -757,6 +732,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
@@ -847,7 +823,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
- bool is64Bit = Subtarget.isPPC64();
+ bool is64Bit = TM.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -885,23 +861,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
- if (!Subtarget.isPPC64())
+ if (!TM.isPPC64())
return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
else
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (!hasBasePointer(MF))
return getFrameRegister(MF);
- if (Subtarget.isPPC64())
+ if (TM.isPPC64())
return PPC::X30;
if (Subtarget.isSVR4ABI() &&
- MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ TM.getRelocationModel() == Reloc::PIC_)
return PPC::R29;
return PPC::R30;
@@ -927,6 +905,7 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
@@ -964,7 +943,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
-
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const PPCFrameLowering *PPCFI =
static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
unsigned StackEst =
@@ -983,7 +962,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// The frame pointer will point to the end of the stack, so estimate the
// offset as the difference between the object offset and the FP location.
- return !isFrameOffsetLegal(MI, Offset);
+ return !isFrameOffsetLegal(MI, getBaseRegister(MF), Offset);
}
/// Insert defining instruction(s) for BaseReg to
@@ -992,7 +971,7 @@ void PPCRegisterInfo::
materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
int64_t Offset) const {
- unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDriOpc = TM.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
MachineBasicBlock::iterator Ins = MBB->begin();
DebugLoc DL; // Defaults to "unknown"
@@ -1000,6 +979,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
DL = Ins->getDebugLoc();
const MachineFunction &MF = *MBB->getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
@@ -1025,6 +1005,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
const MCInstrDesc &MCID = MI.getDesc();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1033,6 +1014,7 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ unsigned BaseReg,
int64_t Offset) const {
unsigned FIOperandNum = 0;
while (!MI->getOperand(FIOperandNum).isFI()) {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 4c2ef90..d304e1d 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -22,15 +22,44 @@
#include "PPCGenRegisterInfo.inc"
namespace llvm {
-class PPCSubtarget;
-class TargetInstrInfo;
-class Type;
+
+inline static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
- const PPCSubtarget &Subtarget;
+ const PPCTargetMachine &TM;
public:
- PPCRegisterInfo(const PPCSubtarget &SubTarget);
+ PPCRegisterInfo(const PPCTargetMachine &TM);
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
@@ -40,13 +69,14 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -97,7 +127,7 @@ public:
int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
+ bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
int64_t Offset) const override;
// Debug information queries.
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 9a7df96..6ca68ed 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -341,6 +341,8 @@ def CRBITRC : RegisterClass<"PPC", [i1], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
CR7, CR2, CR3, CR4)>;
+def CRRC0 : RegisterClass<"PPC", [i32], 32, (add CR0)>;
+
// The CTR registers are not allocatable because they're used by the
// decrement-and-branch instructions, and thus need to stay live across
// multiple basic blocks.
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 2f3a1f9..d0954a1 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -124,401 +124,3 @@ include "PPCScheduleP8.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500mc.td"
include "PPCScheduleE5500.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction to itinerary class map - When add new opcodes to the supported
-// set, refer to the following table to determine which itinerary class the
-// opcode belongs.
-//
-// opcode itinerary class
-// ====== ===============
-// add IIC_IntSimple
-// addc IIC_IntGeneral
-// adde IIC_IntGeneral
-// addi IIC_IntSimple
-// addic IIC_IntGeneral
-// addic. IIC_IntGeneral
-// addis IIC_IntSimple
-// addme IIC_IntGeneral
-// addze IIC_IntGeneral
-// and IIC_IntSimple
-// andc IIC_IntSimple
-// andi. IIC_IntGeneral
-// andis. IIC_IntGeneral
-// b IIC_BrB
-// bc IIC_BrB
-// bcctr IIC_BrB
-// bclr IIC_BrB
-// cmp IIC_IntCompare
-// cmpi IIC_IntCompare
-// cmpl IIC_IntCompare
-// cmpli IIC_IntCompare
-// cntlzd IIC_IntRotateD
-// cntlzw IIC_IntGeneral
-// crand IIC_BrCR
-// crandc IIC_BrCR
-// creqv IIC_BrCR
-// crnand IIC_BrCR
-// crnor IIC_BrCR
-// cror IIC_BrCR
-// crorc IIC_BrCR
-// crxor IIC_BrCR
-// dcba IIC_LdStDCBA
-// dcbf IIC_LdStDCBF
-// dcbi IIC_LdStDCBI
-// dcbst IIC_LdStDCBF
-// dcbt IIC_LdStLoad
-// dcbtst IIC_LdStLoad
-// dcbz IIC_LdStDCBF
-// divd IIC_IntDivD
-// divdu IIC_IntDivD
-// divw IIC_IntDivW
-// divwu IIC_IntDivW
-// dss IIC_LdStDSS
-// dst IIC_LdStDSS
-// dstst IIC_LdStDSS
-// eciwx IIC_LdStLoad
-// ecowx IIC_LdStLoad
-// eieio IIC_LdStLoad
-// eqv IIC_IntSimple
-// extsb IIC_IntSimple
-// extsh IIC_IntSimple
-// extsw IIC_IntSimple
-// fabs IIC_FPGeneral
-// fadd IIC_FPAddSub
-// fadds IIC_FPGeneral
-// fcfid IIC_FPGeneral
-// fcmpo IIC_FPCompare
-// fcmpu IIC_FPCompare
-// fctid IIC_FPGeneral
-// fctidz IIC_FPGeneral
-// fctiw IIC_FPGeneral
-// fctiwz IIC_FPGeneral
-// fdiv IIC_FPDivD
-// fdivs IIC_FPDivS
-// fmadd IIC_FPFused
-// fmadds IIC_FPGeneral
-// fmr IIC_FPGeneral
-// fmsub IIC_FPFused
-// fmsubs IIC_FPGeneral
-// fmul IIC_FPFused
-// fmuls IIC_FPGeneral
-// fnabs IIC_FPGeneral
-// fneg IIC_FPGeneral
-// fnmadd IIC_FPFused
-// fnmadds IIC_FPGeneral
-// fnmsub IIC_FPFused
-// fnmsubs IIC_FPGeneral
-// fres IIC_FPRes
-// frsp IIC_FPGeneral
-// frsqrte IIC_FPGeneral
-// fsel IIC_FPGeneral
-// fsqrt IIC_FPSqrtD
-// fsqrts IIC_FPSqrtS
-// fsub IIC_FPAddSub
-// fsubs IIC_FPGeneral
-// icbi IIC_LdStICBI
-// isel IIC_IntISEL
-// isync IIC_SprISYNC
-// lbz IIC_LdStLoad
-// lbzu IIC_LdStLoadUpd
-// lbzux IIC_LdStLoadUpdX
-// lbzx IIC_LdStLoad
-// ld IIC_LdStLD
-// ldarx IIC_LdStLDARX
-// ldu IIC_LdStLDU
-// ldux IIC_LdStLDUX
-// ldx IIC_LdStLD
-// lfd IIC_LdStLFD
-// lfdu IIC_LdStLFDU
-// lfdux IIC_LdStLFDUX
-// lfdx IIC_LdStLFD
-// lfs IIC_LdStLFD
-// lfsu IIC_LdStLFDU
-// lfsux IIC_LdStLFDUX
-// lfsx IIC_LdStLFD
-// lha IIC_LdStLHA
-// lhau IIC_LdStLHAU
-// lhaux IIC_LdStLHAUX
-// lhax IIC_LdStLHA
-// lhbrx IIC_LdStLoad
-// lhz IIC_LdStLoad
-// lhzu IIC_LdStLoadUpd
-// lhzux IIC_LdStLoadUpdX
-// lhzx IIC_LdStLoad
-// lmw IIC_LdStLMW
-// lswi IIC_LdStLMW
-// lswx IIC_LdStLMW
-// lvebx IIC_LdStLVecX
-// lvehx IIC_LdStLVecX
-// lvewx IIC_LdStLVecX
-// lvsl IIC_LdStLVecX
-// lvsr IIC_LdStLVecX
-// lvx IIC_LdStLVecX
-// lvxl IIC_LdStLVecX
-// lwa IIC_LdStLWA
-// lwarx IIC_LdStLWARX
-// lwaux IIC_LdStLHAUX
-// lwax IIC_LdStLHA
-// lwbrx IIC_LdStLoad
-// lwz IIC_LdStLoad
-// lwzu IIC_LdStLoadUpd
-// lwzux IIC_LdStLoadUpdX
-// lwzx IIC_LdStLoad
-// mcrf IIC_BrMCR
-// mcrfs IIC_FPGeneral
-// mcrxr IIC_BrMCRX
-// mfcr IIC_SprMFCR
-// mffs IIC_IntMFFS
-// mfmsr IIC_SprMFMSR
-// mfspr IIC_SprMFSPR
-// mfsr IIC_SprMFSR
-// mfsrin IIC_SprMFSR
-// mftb IIC_SprMFTB
-// mfvscr IIC_IntMFVSCR
-// mtcrf IIC_BrMCRX
-// mtfsb0 IIC_IntMTFSB0
-// mtfsb1 IIC_IntMTFSB0
-// mtfsf IIC_IntMTFSB0
-// mtfsfi IIC_IntMTFSB0
-// mtmsr IIC_SprMTMSR
-// mtmsrd IIC_LdStLD
-// mtspr IIC_SprMTSPR
-// mtsr IIC_SprMTSR
-// mtsrd IIC_IntMTSRD
-// mtsrdin IIC_IntMTSRD
-// mtsrin IIC_SprMTSRIN
-// mtvscr IIC_IntMFVSCR
-// mulhd IIC_IntMulHD
-// mulhdu IIC_IntMulHD
-// mulhw IIC_IntMulHW
-// mulhwu IIC_IntMulHWU
-// mulld IIC_IntMulHD
-// mulli IIC_IntMulLI
-// mullw IIC_IntMulHW
-// nand IIC_IntSimple
-// neg IIC_IntSimple
-// nor IIC_IntSimple
-// or IIC_IntSimple
-// orc IIC_IntSimple
-// ori IIC_IntSimple
-// oris IIC_IntSimple
-// rfi IIC_SprRFI
-// rfid IIC_IntRFID
-// rldcl IIC_IntRotateD
-// rldcr IIC_IntRotateD
-// rldic IIC_IntRotateDI
-// rldicl IIC_IntRotateDI
-// rldicr IIC_IntRotateDI
-// rldimi IIC_IntRotateDI
-// rlwimi IIC_IntRotate
-// rlwinm IIC_IntGeneral
-// rlwnm IIC_IntGeneral
-// sc IIC_SprSC
-// slbia IIC_LdStSLBIA
-// slbie IIC_LdStSLBIE
-// sld IIC_IntRotateD
-// slw IIC_IntGeneral
-// srad IIC_IntRotateD
-// sradi IIC_IntRotateDI
-// sraw IIC_IntShift
-// srawi IIC_IntShift
-// srd IIC_IntRotateD
-// srw IIC_IntGeneral
-// stb IIC_LdStStore
-// stbu IIC_LdStStoreUpd
-// stbux IIC_LdStStoreUpd
-// stbx IIC_LdStStore
-// std IIC_LdStSTD
-// stdcx. IIC_LdStSTDCX
-// stdu IIC_LdStSTDU
-// stdux IIC_LdStSTDUX
-// stdx IIC_LdStSTD
-// stfd IIC_LdStSTFD
-// stfdu IIC_LdStSTFDU
-// stfdux IIC_LdStSTFDU
-// stfdx IIC_LdStSTFD
-// stfiwx IIC_LdStSTFD
-// stfs IIC_LdStSTFD
-// stfsu IIC_LdStSTFDU
-// stfsux IIC_LdStSTFDU
-// stfsx IIC_LdStSTFD
-// sth IIC_LdStStore
-// sthbrx IIC_LdStStore
-// sthu IIC_LdStStoreUpd
-// sthux IIC_LdStStoreUpd
-// sthx IIC_LdStStore
-// stmw IIC_LdStLMW
-// stswi IIC_LdStLMW
-// stswx IIC_LdStLMW
-// stvebx IIC_LdStSTVEBX
-// stvehx IIC_LdStSTVEBX
-// stvewx IIC_LdStSTVEBX
-// stvx IIC_LdStSTVEBX
-// stvxl IIC_LdStSTVEBX
-// stw IIC_LdStStore
-// stwbrx IIC_LdStStore
-// stwcx. IIC_LdStSTWCX
-// stwu IIC_LdStStoreUpd
-// stwux IIC_LdStStoreUpd
-// stwx IIC_LdStStore
-// subf IIC_IntGeneral
-// subfc IIC_IntGeneral
-// subfe IIC_IntGeneral
-// subfic IIC_IntGeneral
-// subfme IIC_IntGeneral
-// subfze IIC_IntGeneral
-// sync IIC_LdStSync
-// td IIC_IntTrapD
-// tdi IIC_IntTrapD
-// tlbia IIC_LdStSLBIA
-// tlbie IIC_LdStDCBF
-// tlbsync IIC_SprTLBSYNC
-// tw IIC_IntTrapW
-// twi IIC_IntTrapW
-// vaddcuw IIC_VecGeneral
-// vaddfp IIC_VecFP
-// vaddsbs IIC_VecGeneral
-// vaddshs IIC_VecGeneral
-// vaddsws IIC_VecGeneral
-// vaddubm IIC_VecGeneral
-// vaddubs IIC_VecGeneral
-// vadduhm IIC_VecGeneral
-// vadduhs IIC_VecGeneral
-// vadduwm IIC_VecGeneral
-// vadduws IIC_VecGeneral
-// vand IIC_VecGeneral
-// vandc IIC_VecGeneral
-// vavgsb IIC_VecGeneral
-// vavgsh IIC_VecGeneral
-// vavgsw IIC_VecGeneral
-// vavgub IIC_VecGeneral
-// vavguh IIC_VecGeneral
-// vavguw IIC_VecGeneral
-// vcfsx IIC_VecFP
-// vcfux IIC_VecFP
-// vcmpbfp IIC_VecFPCompare
-// vcmpeqfp IIC_VecFPCompare
-// vcmpequb IIC_VecGeneral
-// vcmpequh IIC_VecGeneral
-// vcmpequw IIC_VecGeneral
-// vcmpgefp IIC_VecFPCompare
-// vcmpgtfp IIC_VecFPCompare
-// vcmpgtsb IIC_VecGeneral
-// vcmpgtsh IIC_VecGeneral
-// vcmpgtsw IIC_VecGeneral
-// vcmpgtub IIC_VecGeneral
-// vcmpgtuh IIC_VecGeneral
-// vcmpgtuw IIC_VecGeneral
-// vctsxs IIC_VecFP
-// vctuxs IIC_VecFP
-// vexptefp IIC_VecFP
-// vlogefp IIC_VecFP
-// vmaddfp IIC_VecFP
-// vmaxfp IIC_VecFPCompare
-// vmaxsb IIC_VecGeneral
-// vmaxsh IIC_VecGeneral
-// vmaxsw IIC_VecGeneral
-// vmaxub IIC_VecGeneral
-// vmaxuh IIC_VecGeneral
-// vmaxuw IIC_VecGeneral
-// vmhaddshs IIC_VecComplex
-// vmhraddshs IIC_VecComplex
-// vminfp IIC_VecFPCompare
-// vminsb IIC_VecGeneral
-// vminsh IIC_VecGeneral
-// vminsw IIC_VecGeneral
-// vminub IIC_VecGeneral
-// vminuh IIC_VecGeneral
-// vminuw IIC_VecGeneral
-// vmladduhm IIC_VecComplex
-// vmrghb IIC_VecPerm
-// vmrghh IIC_VecPerm
-// vmrghw IIC_VecPerm
-// vmrglb IIC_VecPerm
-// vmrglh IIC_VecPerm
-// vmrglw IIC_VecPerm
-// vmsubfp IIC_VecFP
-// vmsummbm IIC_VecComplex
-// vmsumshm IIC_VecComplex
-// vmsumshs IIC_VecComplex
-// vmsumubm IIC_VecComplex
-// vmsumuhm IIC_VecComplex
-// vmsumuhs IIC_VecComplex
-// vmulesb IIC_VecComplex
-// vmulesh IIC_VecComplex
-// vmuleub IIC_VecComplex
-// vmuleuh IIC_VecComplex
-// vmulosb IIC_VecComplex
-// vmulosh IIC_VecComplex
-// vmuloub IIC_VecComplex
-// vmulouh IIC_VecComplex
-// vnor IIC_VecGeneral
-// vor IIC_VecGeneral
-// vperm IIC_VecPerm
-// vpkpx IIC_VecPerm
-// vpkshss IIC_VecPerm
-// vpkshus IIC_VecPerm
-// vpkswss IIC_VecPerm
-// vpkswus IIC_VecPerm
-// vpkuhum IIC_VecPerm
-// vpkuhus IIC_VecPerm
-// vpkuwum IIC_VecPerm
-// vpkuwus IIC_VecPerm
-// vrefp IIC_VecFPRound
-// vrfim IIC_VecFPRound
-// vrfin IIC_VecFPRound
-// vrfip IIC_VecFPRound
-// vrfiz IIC_VecFPRound
-// vrlb IIC_VecGeneral
-// vrlh IIC_VecGeneral
-// vrlw IIC_VecGeneral
-// vrsqrtefp IIC_VecFP
-// vsel IIC_VecGeneral
-// vsl IIC_VecVSL
-// vslb IIC_VecGeneral
-// vsldoi IIC_VecPerm
-// vslh IIC_VecGeneral
-// vslo IIC_VecPerm
-// vslw IIC_VecGeneral
-// vspltb IIC_VecPerm
-// vsplth IIC_VecPerm
-// vspltisb IIC_VecPerm
-// vspltish IIC_VecPerm
-// vspltisw IIC_VecPerm
-// vspltw IIC_VecPerm
-// vsr IIC_VecVSR
-// vsrab IIC_VecGeneral
-// vsrah IIC_VecGeneral
-// vsraw IIC_VecGeneral
-// vsrb IIC_VecGeneral
-// vsrh IIC_VecGeneral
-// vsro IIC_VecPerm
-// vsrw IIC_VecGeneral
-// vsubcuw IIC_VecGeneral
-// vsubfp IIC_VecFP
-// vsubsbs IIC_VecGeneral
-// vsubshs IIC_VecGeneral
-// vsubsws IIC_VecGeneral
-// vsububm IIC_VecGeneral
-// vsububs IIC_VecGeneral
-// vsubuhm IIC_VecGeneral
-// vsubuhs IIC_VecGeneral
-// vsubuwm IIC_VecGeneral
-// vsubuws IIC_VecGeneral
-// vsum2sws IIC_VecComplex
-// vsum4sbs IIC_VecComplex
-// vsum4shs IIC_VecComplex
-// vsum4ubs IIC_VecComplex
-// vsumsws IIC_VecComplex
-// vupkhpx IIC_VecPerm
-// vupkhsb IIC_VecPerm
-// vupkhsh IIC_VecPerm
-// vupklpx IIC_VecPerm
-// vupklsb IIC_VecPerm
-// vupklsh IIC_VecPerm
-// vxor IIC_VecGeneral
-// xor IIC_IntSimple
-// xori IIC_IntSimple
-// xoris IIC_IntSimple
-//
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c91428d..ed88803 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
HasVSX = false;
HasP8Vector = false;
HasP8Altivec = false;
+ HasP8Crypto = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;
@@ -94,7 +95,9 @@ void PPCSubtarget::initializeEnvironment() {
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
+ HasPartwordAtomics = false;
IsQPXStackUnaligned = false;
+ HasHTM = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 247a96d..b4c1bb1 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -90,6 +90,7 @@ protected:
bool HasVSX;
bool HasP8Vector;
bool HasP8Altivec;
+ bool HasP8Crypto;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@@ -113,6 +114,8 @@ protected:
bool IsLittleEndian;
bool HasICBT;
bool HasInvariantFunctionDescriptors;
+ bool HasPartwordAtomics;
+ bool HasHTM;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@@ -218,6 +221,7 @@ public:
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
+ bool hasP8Crypto() const { return HasP8Crypto; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
@@ -234,6 +238,7 @@ public:
bool hasInvariantFunctionDescriptors() const {
return HasInvariantFunctionDescriptors;
}
+ bool hasPartwordAtomics() const { return HasPartwordAtomics; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
@@ -242,6 +247,7 @@ public:
return 16;
}
+ bool hasHTM() const { return HasHTM; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index b219e93..7267529 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -160,11 +160,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, computeFSAdditions(FS, OL, TT), Options, RM,
- CM, OL),
+ : LLVMTargetMachine(T, getDataLayoutString(Triple(TT)), TT, CPU,
+ computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- TargetABI(computeTargetABI(Triple(TT), Options)),
- DL(getDataLayoutString(Triple(TT))), Subtarget(TT, CPU, TargetFS, *this) {
+ TargetABI(computeTargetABI(Triple(TT), Options)) {
initAsmInfo();
}
@@ -208,7 +207,15 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = llvm::make_unique<PPCSubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<PPCSubtarget>(
+ TargetTriple, CPU,
+ // FIXME: It would be good to have the subtarget additions here
+ // not necessary. Anything that turns them on/off (overrides) ends
+ // up being put at the end of the feature string, but the defaults
+ // shouldn't require adding them. Fixing this means pulling Feature64Bit
+ // out of most of the target cpus in the .td file and making it set only
+ // as part of initialization via the TargetTriple.
+ computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
}
return I.get();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6508484..7a49058 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,10 +29,6 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- // Calculates type size & alignment
- const DataLayout DL;
- PPCSubtarget Subtarget;
-
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
public:
@@ -42,8 +38,6 @@ public:
~PPCTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const PPCSubtarget *getSubtargetImpl(const Function &F) const override;
// Pass Pipeline Configuration
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 073bbb0..b46acd4 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
BaseT::getUnrollingPreferences(L, UP);
}
+bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ return LoopHasReductions;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index cef7079..21acea1 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -78,6 +78,7 @@ public:
/// \name Vector TTI Implementations
/// @{
+ bool enableAggressiveInterleaving(bool LoopHasReductions);
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor();
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 4132b04..dfe988f 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -621,3 +621,10 @@ void foo() {
bar(x);
__asm__("" ::: "cr2");
}
+
+//===----------------------------------------------------------------------===//
+
+Instruction fusion was introduced in ISA 2.06 and more opportunities added in
+ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
+and force instruction pairs to be scheduled together.
+
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 1e4c6fb..43d87d3 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -209,3 +209,107 @@ vector float f(vector float a, vector float b) {
return b;
}
+//===----------------------------------------------------------------------===//
+
+We should do a little better with eliminating dead stores.
+The stores to the stack are dead since %a and %b are not needed
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+ entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ %0 = load <16 x i8>* %a, align 16
+ %1 = load <16 x i8>* %b, align 16
+ %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+ ret <16 x i8> %2
+}
+
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+
+Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
+# BB#0: # %entry
+ addis 3, 2, .LCPI0_0@toc@ha
+ addis 4, 2, .LCPI0_1@toc@ha
+ addi 3, 3, .LCPI0_0@toc@l
+ addi 4, 4, .LCPI0_1@toc@l
+ lxvw4x 0, 0, 3
+ addi 3, 1, -16
+ lxvw4x 35, 0, 4
+ stxvw4x 0, 0, 3
+ ori 2, 2, 0
+ lxvw4x 34, 0, 3
+ addi 3, 1, -32
+ stxvw4x 35, 0, 3
+ vpmsumb 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+
+The two stxvw4x instructions are not needed.
+With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
+are present too.
+
+//===----------------------------------------------------------------------===//
+
+The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = add <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+
+This will generate the following instruction sequence:
+ std 5, -8(1)
+ std 5, -16(1)
+ addi 3, 1, -16
+ ori 2, 2, 0
+ lxvd2x 35, 0, 3
+ vaddudm 2, 2, 3
+ blr
+
+This will almost certainly cause a load-hit-store hazard.
+Since val is a value parameter, it should not need to be saved onto
+the stack, unless it's being done set up the vector register. Instead,
+it would be better to splat teh value into a vector register, and then
+remove the (dead) stores to the stack.
+
+//===----------------------------------------------------------------------===//
+
+At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
+preference to stfd. When we have a reg-immediate addressing mode, this is a
+poor choice, since we have to load the address into an index register. This
+should be fixed for P7/P8.
+
+//===----------------------------------------------------------------------===//
+
+Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
+However, we could actually support both kinds on either endianness, if we check
+for the appropriate shufflevector pattern for each case ... this would cause
+some additional shufflevectors to be recognized and implemented via the
+"swapped" form.
+
+//===----------------------------------------------------------------------===//
+
+There is a utility program called PerfectShuffle that generates a table of the
+shortest instruction sequence for implementing a shufflevector operation on
+PowerPC. However, this was designed for big-endian code generation. We could
+modify this program to create a little endian version of the table. The table
+is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
+
+//===----------------------------------------------------------------------===//
+
+Opportunies to use instructions from PPCInstrVSX.td during code gen
+ - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
+ - Scalar comparisons (xscmpodp and xscmpudp)
+ - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
+
+Related to this: we currently do not generate the lxvw4x instruction for either
+v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
+a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index a7d48b3..e5d5ce2 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -103,6 +103,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
+def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 92bc314..d911014 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -105,8 +105,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
- EmitFunctionHeader();
-
MCContext &Context = getObjFileLowering().getContext();
const MCSectionELF *ConfigSection =
Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);
@@ -129,7 +127,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
HexLines.clear();
DisasmLineMaxLen = 0;
- OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
EmitFunctionBody();
if (isVerbose()) {
@@ -339,6 +336,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ if (STM.hasSGPRInitBug()) {
+ if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
+ llvm_unreachable("Too many SGPRs used with the SGPR init bug");
+
+ ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index b5ab703..7341cd9 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -95,7 +95,8 @@ private:
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
+ SDValue &SOffset, SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
@@ -120,6 +121,11 @@ private:
SDNode *SelectADD_SUB_I64(SDNode *N);
SDNode *SelectDIV_SCALE(SDNode *N);
+ SDNode *getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width);
+ SDNode *SelectS_BFEFromShifts(SDNode *N);
+ SDNode *SelectS_BFE(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
};
@@ -519,21 +525,11 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
bool Signed = Opc == AMDGPUISD::BFE_I32;
- // Transformation function, pack the offset and width of a BFE into
- // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
- // source, bits [5:0] contain the offset and bits [22:16] the width.
-
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- uint32_t PackedVal = OffsetVal | WidthVal << 16;
-
- SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
- return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N),
- MVT::i32,
- N->getOperand(0),
- PackedOffsetWidth);
+ return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
+ N->getOperand(0), OffsetVal, WidthVal);
}
case AMDGPUISD::DIV_SCALE: {
@@ -547,6 +543,14 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
}
case ISD::ADDRSPACECAST:
return SelectAddrSpaceCast(N);
+ case ISD::AND:
+ case ISD::SRL:
+ case ISD::SRA:
+ if (N->getValueType(0) != MVT::i32 ||
+ Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ return SelectS_BFE(N);
}
return SelectCode(N);
@@ -966,8 +970,9 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset) const {
- SDValue Ptr, Offen, Idxen, Addr64, GLC, SLC, TFE;
+ SDValue &Offset, SDValue &GLC,
+ SDValue &SLC, SDValue &TFE) const {
+ SDValue Ptr, Offen, Idxen, Addr64;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@@ -991,8 +996,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, MVT::i1);
+ SDValue GLC, TFE;
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset);
+ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
@@ -1147,6 +1153,95 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
return CurDAG->getNode(ISD::BITCAST, DL, DestVT, Src).getNode();
}
+SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, SDLoc DL, SDValue Val,
+ uint32_t Offset, uint32_t Width) {
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+ uint32_t PackedVal = Offset | (Width << 16);
+ SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
+ // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
+ // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
+ // Predicate: 0 < b <= c < 32
+
+ const SDValue &Shl = N->getOperand(0);
+ ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (B && C) {
+ uint32_t BVal = B->getZExtValue();
+ uint32_t CVal = C->getZExtValue();
+
+ if (0 < BVal && BVal <= CVal && CVal < 32) {
+ bool Signed = N->getOpcode() == ISD::SRA;
+ unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
+
+ return getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0),
+ CVal - BVal, 32 - CVal);
+ }
+ }
+ return SelectCode(N);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
+ switch (N->getOpcode()) {
+ case ISD::AND:
+ if (N->getOperand(0).getOpcode() == ISD::SRL) {
+ // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
+ // Predicate: isMask(mask)
+ const SDValue &Srl = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue();
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), Srl.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ }
+ break;
+ case ISD::SRL:
+ if (N->getOperand(0).getOpcode() == ISD::AND) {
+ // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
+ // Predicate: isMask(mask >> b)
+ const SDValue &And = N->getOperand(0);
+ ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
+
+ if (Shift && Mask) {
+ uint32_t ShiftVal = Shift->getZExtValue();
+ uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
+
+ if (isMask_32(MaskVal)) {
+ uint32_t WidthVal = countPopulation(MaskVal);
+
+ return getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), And.getOperand(0),
+ ShiftVal, WidthVal);
+ }
+ }
+ } else if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ case ISD::SRA:
+ if (N->getOperand(0).getOpcode() == ISD::SHL)
+ return SelectS_BFEFromShifts(N);
+ break;
+ }
+
+ return SelectCode(N);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4707279..62a33fa 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -885,9 +885,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDGPU_fract:
- case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index f4de2d6..f0f10ca 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
+ : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -152,26 +152,22 @@ bool AMDGPUInstrInfo::expandPostRAPseudo (MachineBasicBlock::iterator MI) const
return true;
}
-
-MachineInstr *
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// TODO: Implement this function
return nullptr;
}
-MachineInstr*
-AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const {
+MachineInstr *
+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
// TODO: Implement this function
return nullptr;
}
-bool
-AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+ ArrayRef<unsigned> Ops) const {
// TODO: Implement this function
return false;
}
@@ -360,8 +356,8 @@ static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
}
int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- int MCOp = AMDGPU::getMCOpcode(Opcode,
- AMDGPUSubtargetToSISubtarget(RI.ST.getGeneration()));
+ int MCOp = AMDGPU::getMCOpcode(
+ Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 202183c..07042b5 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -85,14 +85,13 @@ public:
const TargetRegisterInfo *TRI) const override;
protected:
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const override;
+
public:
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
@@ -103,7 +102,7 @@ public:
int getIndirectIndexEnd(const MachineFunction &MF) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const override;
+ ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 849b241..4d08201 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -578,22 +578,20 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
// Bitfield extract patterns
-/*
-
-XXX: The BFE pattern is not working correctly because the XForm is not being
-applied.
+def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{
+ return isMask_32(N->getZExtValue());
+}]>;
-def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
-def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
- SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()), MVT::i32);}]>>;
+def IMMPopCount : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()),
+ MVT::i32);
+}]>;
-class BFEPattern <Instruction BFE> : Pat <
- (and (srl i32:$x, legalshift32:$y), bfemask:$z),
- (BFE $x, $y, $z)
+class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
>;
-*/
-
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
(rotr i32:$src0, i32:$src1),
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index eee9c29..ab489cd 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index b81fef4..175dcd8 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -87,7 +88,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
continue;
if (Use->getParent()->getParent() == &F)
LocalMemAvailable -=
- Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ Mod->getDataLayout().getTypeAllocSize(GVTy->getElementType());
}
}
}
@@ -276,8 +277,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
// value from the reqd_work_group_size function attribute if it is
// available.
unsigned WorkGroupSize = 256;
- int AllocaSize = WorkGroupSize *
- Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+ int AllocaSize =
+ WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);
if (AllocaSize > LocalMemAvailable) {
DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
@@ -294,9 +295,9 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
+ Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
GlobalVariable *GV = new GlobalVariable(
- *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
- GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
FunctionType *FTy = FunctionType::get(
@@ -332,7 +333,7 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
Indices.push_back(TID);
- Value *Offset = Builder.CreateGEP(GV, Indices);
+ Value *Offset = Builder.CreateGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
I.replaceAllUsesWith(Offset);
I.eraseFromParent();
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 57b054b..3ca0eca 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,10 +17,7 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPUGenRegisterInfo(0),
- ST(st)
- { }
+AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
//===----------------------------------------------------------------------===//
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index f27576a..cfd800b 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,9 +30,8 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
static const MCPhysReg CalleeSavedReg;
- const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(const AMDGPUSubtarget &st);
+ AMDGPURegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 70c8525..0ead652 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -70,7 +70,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
- EnableVGPRSpilling(false),
+ EnableVGPRSpilling(false), SGPRInitBug(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b0122c..403a3e4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -44,6 +44,10 @@ public:
VOLCANIC_ISLANDS,
};
+ enum {
+ FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+ };
+
private:
std::string DevName;
bool Is64bit;
@@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
+ bool SGPRInitBug;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -206,6 +211,10 @@ public:
return LocalMemorySize;
}
+ bool hasSGPRInitBug() const {
+ return SGPRInitBug;
+ }
+
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index a862f3c..cb95835 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -71,10 +71,10 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OptLevel)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- DL(computeDataLayout(TT)),
- TLOF(new TargetLoweringObjectFileELF()),
- Subtarget(TT, CPU, FS, *this), IntrinsicInfo() {
+ : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
+ OptLevel),
+ TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
@@ -118,7 +118,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
return createR600MachineScheduler(C);
return nullptr;
@@ -174,7 +174,7 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (ST.isPromoteAllocaEnabled()) {
addPass(createAMDGPUPromoteAlloca(ST));
addPass(createSROAPass());
@@ -184,7 +184,7 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
bool
AMDGPUPassConfig::addPreISel() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createFlattenCFGPass());
if (ST.IsIRStructurizerEnabled())
addPass(createStructurizeCFGPass());
@@ -211,7 +211,7 @@ void R600PassConfig::addPreRegAlloc() {
}
void R600PassConfig::addPreSched2() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
addPass(createR600EmitClauseMarkers(), false);
if (ST.isIfCvtEnabled())
addPass(&IfConverterID, false);
@@ -251,15 +251,15 @@ bool GCNPassConfig::addInstSelector() {
}
void GCNPassConfig::addPreRegAlloc() {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl();
if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) {
- // Don't do this with no optimizations since it throws away debug info by
- // merging nonadjacent loads.
+ // Don't do this with no optimizations since it throws away debug info by
+ // merging nonadjacent loads.
- // This should be run after scheduling, but before register allocation. It
- // also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
- insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+ // This should be run after scheduling, but before register allocation. It
+ // also need extra copies to the address operand to be eliminated.
+ initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
+ insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
}
addPass(createSIShrinkInstructionsPass(), false);
addPass(createSIFixSGPRLiveRangesPass(), false);
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index a691536..785c119 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -30,7 +30,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
private:
- const DataLayout DL;
protected:
TargetLoweringObjectFile *TLOF;
@@ -42,12 +41,9 @@ public:
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- // FIXME: This is currently broken, the DataLayout needs to move to
- // the target machine.
- const DataLayout *getDataLayout() const override {
- return &DL;
- }
- const AMDGPUSubtarget *getSubtargetImpl() const override {
+
+ const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 68f4600..96edc41 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
@@ -36,13 +37,15 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
// TODO: Do we want runtime unrolling?
for (const BasicBlock *BB : L->getBlocks()) {
+ const DataLayout &DL = BB->getModule()->getDataLayout();
for (const Instruction &I : *BB) {
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
continue;
const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr));
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
if (Alloca) {
// We want to do whatever we can to limit the number of alloca
// instructions that make it through to the code generator. allocas
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6e8ec..ee6551b 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -10,8 +10,8 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include <deque>
using namespace llvm;
@@ -165,6 +166,7 @@ public:
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
OrderedBlks.clear();
+ Visited.clear();
FuncRep = &MF;
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
@@ -621,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
++It) {
MachineInstr *instr = &(*It);
- if (instr->getDebugLoc().isUnknown() == false)
+ if (!instr->getDebugLoc().isUnknown())
DL = instr->getDebugLoc();
}
return DL;
@@ -1075,21 +1077,19 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
}
int AMDGPUCFGStructurizer::loopendPatternMatch() {
- std::vector<MachineLoop *> NestedLoops;
- for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
- ++It)
- for (MachineLoop *ML : depth_first(*It))
- NestedLoops.push_back(ML);
+ std::deque<MachineLoop *> NestedLoops;
+ for (auto &It: *MLI)
+ for (MachineLoop *ML : depth_first(It))
+ NestedLoops.push_front(ML);
if (NestedLoops.size() == 0)
return 0;
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
+ // Process nested loop outside->inside (we did push_front),
+ // so "continue" to a outside loop won't be mistaken as "break"
+ // of the current loop.
int Num = 0;
- for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
- E = NestedLoops.rend(); It != E; ++It) {
- MachineLoop *ExaminedLoop = *It;
+ for (MachineLoop *ExaminedLoop : NestedLoops) {
if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
continue;
DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
@@ -1611,7 +1611,7 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
- if (UseContinueLogical == false) {
+ if (!UseContinueLogical) {
int BranchOpcode =
TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
getBranchZeroOpcode(OldOpcode);
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 3b4ba1a..49f0f23 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -46,10 +46,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
/// }
public:
- AMDGPUAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), Parser(Parser) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 9f9472c..5560146 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -287,9 +287,8 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-// XXX: This pattern is broken, disabling for now. See comment in
-// AMDGPUInstructions.td for more info.
-// def : BFEPattern <BFE_UINT_eg>;
+def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
VecALU
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index b66ed10..d62fd3f 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -99,6 +99,12 @@ void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
printU8ImmDecOperand(MI, OpNo, O);
}
+void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).getImm())
+ O << " gds";
+}
+
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
@@ -208,6 +214,16 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
}
+void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
+ O << "_e64 ";
+ else
+ O << "_e32 ";
+
+ printOperand(MI, OpNo, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
int32_t SImm = static_cast<int32_t>(Imm);
if (SImm >= -16 && SImm <= 64) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d43c7a..5289718 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -44,10 +44,12 @@ private:
void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
+ void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printImmediate32(uint32_t I, raw_ostream &O);
void printImmediate64(uint64_t I, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 83403ba..fb2deef 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -17,6 +17,7 @@
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "SIDefines.h"
#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -72,50 +73,19 @@ static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
-static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
- return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
- } else {
- return createR600MCCodeEmitter(MCII, MRI, STI);
- }
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, false);
-}
-
extern "C" void LLVMInitializeR600TargetMC() {
+ for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+ RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
+
+ TargetRegistry::RegisterMCCodeGenInfo(*T, createAMDGPUMCCodeGenInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
+ TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
+ TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ }
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
- RegisterMCAsmInfo<AMDGPUMCAsmInfo> Z(TheGCNTarget);
-
- TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
- TargetRegistry::RegisterMCCodeGenInfo(TheGCNTarget, createAMDGPUMCCodeGenInfo);
-
- TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheGCNTarget, createAMDGPUMCInstrInfo);
-
- TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheGCNTarget, createAMDGPUMCRegisterInfo);
-
- TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheGCNTarget, createAMDGPUMCSubtargetInfo);
-
- TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheGCNTarget, createAMDGPUMCInstPrinter);
-
- TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createAMDGPUMCCodeEmitter);
-
- TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheGCNTarget, createAMDGPUAsmBackend);
-
- TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheGCNTarget, createMCStreamer);
+ TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+ createR600MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index bc8cd53..23f0196 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
#include "llvm/ADT/StringRef.h"
namespace llvm {
@@ -34,11 +35,10 @@ extern Target TheGCNTarget;
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI);
+ MCContext &Ctx);
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 8a555ff..fa25f59 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -81,8 +81,8 @@ enum FCInstr {
};
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 7e23772..760aa37 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -72,7 +72,6 @@ public:
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SIMCCodeEmitter(MCII, MRI, Ctx);
}
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index fb5aa61..82c6d13 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -119,8 +119,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
-def : ProcessorModel<"tonga", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"tonga", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
-def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
+def : ProcessorModel<"iceland", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureSGPRInitBug]
+>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index f07be00..3cb9021 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -14,11 +14,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index c738611..a34e2dc 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -837,6 +837,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -1479,8 +1483,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
// Lower loads constant address space global variable loads
if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(
- GetUnderlyingObject(LoadNode->getMemOperand()->getValue()))) {
+ isa<GlobalVariable>(GetUnderlyingObject(
+ LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
@@ -1867,7 +1871,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SelectCC.getOperand(0), // LHS
SelectCC.getOperand(1), // RHS
DAG.getConstant(-1, MVT::i32), // True
- DAG.getConstant(0, MVT::i32), // Flase
+ DAG.getConstant(0, MVT::i32), // False
SelectCC.getOperand(4)); // CC
break;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 653fd0d..5f0bdf3 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
#include "AMDGPUGenDFAPacketizer.inc"
R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st),
- RI(st)
- { }
+ : AMDGPUInstrInfo(st), RI() {}
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
return RI;
@@ -268,9 +266,8 @@ int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
return getOperandIdx(Opcode, OpTable[SrcNum]);
}
-#define SRC_SEL_ROWS 11
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
- static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+ static const unsigned SrcSelTable[][2] = {
{AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
{AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
{AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
@@ -284,14 +281,13 @@ int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
{AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
};
- for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
- if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
- return getOperandIdx(Opcode, SrcSelTable[i][1]);
+ for (const auto &Row : SrcSelTable) {
+ if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx) {
+ return getOperandIdx(Opcode, Row[1]);
}
}
return -1;
}
-#undef SRC_SEL_ROWS
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
R600InstrInfo::getSrcs(MachineInstr *MI) const {
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 742c0e0..0c06ccc 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,10 +27,9 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
-#include "R600InstrInfo.h"
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,6 +37,7 @@
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index dc95675..fb0359c 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,14 +20,16 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { RCW.RegWeight = 0; RCW.WeightLimit = 0;}
+R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+ RCW.RegWeight = 0;
+ RCW.WeightLimit = 0;
+}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
+ const R600InstrInfo *TII =
+ static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index f1a8a41..9713e60 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -24,7 +24,7 @@ class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
RegClassWeight RCW;
- R600RegisterInfo(const AMDGPUSubtarget &st);
+ R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
index f34c375..0c54446 100644
--- a/lib/Target/R600/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -54,6 +54,7 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/lib/Target/R600/SIFoldOperands.cpp b/lib/Target/R600/SIFoldOperands.cpp
index ae4b05d..7ba5a6d 100644
--- a/lib/Target/R600/SIFoldOperands.cpp
+++ b/lib/Target/R600/SIFoldOperands.cpp
@@ -17,9 +17,10 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-fold-operands"
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 7d794b8..bd0c3c2 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -172,16 +172,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
- // We only support LOAD/STORE and vector manipulation ops for vectors
- // with > 4 elements.
- MVT VecTypes[] = {
- MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
- };
-
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
- for (MVT VT : VecTypes) {
+ // We only support LOAD/STORE and vector manipulation ops for vectors
+ // with > 4 elements.
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -206,10 +202,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
@@ -932,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
+ return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
+ DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
+
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1346,6 +1348,35 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+/// \brief Return true if the given offset Size in bytes can be folded into
+/// the immediate offsets of a memory instruction for the given address space.
+static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
+ const AMDGPUSubtarget &STI) {
+ switch (AS) {
+ case AMDGPUAS::GLOBAL_ADDRESS: {
+ // MUBUF instructions a 12-bit offset in bytes.
+ return isUInt<12>(OffsetSize);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // SMRD instructions have an 8-bit offset in dwords on SI and
+ // a 20-bit offset in bytes on VI.
+ if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return isUInt<20>(OffsetSize);
+ else
+ return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
+ }
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::REGION_ADDRESS: {
+ // The single offset versions have a 16-bit offset in bytes.
+ return isUInt<16>(OffsetSize);
+ }
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ // Indirect register addressing does not use any offsets.
+ default:
+ return 0;
+ }
+}
+
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
// This is a variant of
@@ -1377,13 +1408,10 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!CAdd)
return SDValue();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
// If the resulting offset is too large, we can't fold it into the addressing
// mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
- if (!TII->canFoldOffset(Offset.getZExtValue(), AddrSpace))
+ if (!canFoldOffset(Offset.getZExtValue(), AddrSpace, *Subtarget))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -1595,6 +1623,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
+ N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 50f20ac..90a37f1 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -259,7 +259,8 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
return;
}
- if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
@@ -412,7 +413,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <
+ AMDGPUSubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index c90c741..4167590 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -83,6 +83,9 @@ class Enc64 {
int Size = 8;
}
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
+def VOPDstVCC : VOPDstOperand <VCCReg>;
+
let Uses = [EXEC] in {
class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -96,7 +99,7 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VCCReg:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
let DisableEncoding = "$dst";
let VOPC = 1;
@@ -577,6 +580,12 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let DS = 1;
let UseNamedOperandTable = 1;
let DisableEncoding = "$m0";
+
+ // Most instruction load and store data, so set this as the default.
+ let mayLoad = 1;
+ let mayStore = 1;
+
+ let hasSideEffects = 0;
let SchedRW = [WriteLDS];
}
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4f1e5ad..ba98ad7 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
- : AMDGPUInstrInfo(st), RI(st) {}
+ : AMDGPUInstrInfo(st), RI() {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -120,12 +120,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
+ const ConstantSDNode *Load0Offset =
+ dyn_cast<ConstantSDNode>(Load0->getOperand(1));
+ const ConstantSDNode *Load1Offset =
+ dyn_cast<ConstantSDNode>(Load1->getOperand(1));
+
+ if (!Load0Offset || !Load1Offset)
+ return false;
+
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
- Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
+ Offset0 = Load0Offset->getZExtValue();
+ Offset1 = Load1Offset->getZExtValue();
return true;
}
@@ -418,7 +426,9 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
}
-unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
+unsigned SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
+ const unsigned Opcode = MI.getOpcode();
+
int NewOpc;
// Try to map original to commuted opcode
@@ -583,10 +593,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
unsigned InputPtrReg =
TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
- static const unsigned TIDIGRegs[3] = {
- TIDIGXReg, TIDIGYReg, TIDIGZReg
- };
- for (unsigned Reg : TIDIGRegs) {
+ for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
@@ -720,6 +727,26 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
+ unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
+ unsigned Src0 = MI->getOperand(1).getReg();
+ unsigned Src1 = MI->getOperand(2).getReg();
+ const MachineOperand &SrcCond = MI->getOperand(3);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
+ .addOperand(SrcCond);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
+ .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
+ .addOperand(SrcCond);
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
@@ -792,7 +819,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
}
if (MI)
- MI->setDesc(get(commuteOpcode(MI->getOpcode())));
+ MI->setDesc(get(commuteOpcode(*MI)));
return MI;
}
@@ -1172,32 +1199,6 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
-bool SIInstrInfo::canFoldOffset(unsigned OffsetSize, unsigned AS) const {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
- // MUBUF instructions a 12-bit offset in bytes.
- return isUInt<12>(OffsetSize);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
- // SMRD instructions have an 8-bit offset in dwords on SI and
- // a 20-bit offset in bytes on VI.
- if (RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return isUInt<20>(OffsetSize);
- else
- return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
- }
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
- // The single offset versions have a 16-bit offset in bytes.
- return isUInt<16>(OffsetSize);
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
- // Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
-}
-
bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
int Op32 = AMDGPU::getVOPe32(Opcode);
if (Op32 == -1)
@@ -1405,6 +1406,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64;
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
@@ -1423,6 +1425,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
+ case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@@ -1865,12 +1868,15 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
MachineInstr *Addr64 =
BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
.addOperand(*VData)
- .addOperand(*SRsrc)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
+ .addOperand(*SRsrc)
.addOperand(*SOffset)
- .addOperand(*Offset);
+ .addOperand(*Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0); // tfe
MI->removeFromParent();
MI = Addr64;
@@ -1914,14 +1920,20 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
// The SMRD has an 8-bit offset in dwords on SI and a 20-bit offset in bytes
// on VI.
+
+ bool IsKill = SBase->isKill();
if (OffOp) {
- bool isVI = RI.ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ bool isVI =
+ MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
+ AMDGPUSubtarget::VOLCANIC_ISLANDS;
unsigned OffScale = isVI ? 1 : 4;
// Handle the _IMM variant
unsigned LoOffset = OffOp->getImm() * OffScale;
unsigned HiOffset = LoOffset + HalfSize;
Lo = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegLo)
- .addOperand(*SBase)
+ // Use addReg instead of addOperand
+ // to make sure kill flag is cleared.
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addImm(LoOffset / OffScale);
if (!isUInt<20>(HiOffset) || (!isVI && !isUInt<8>(HiOffset / OffScale))) {
@@ -1930,25 +1942,28 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32), OffsetSGPR)
.addImm(HiOffset); // The offset in register is in bytes.
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
} else {
Hi = BuildMI(*MBB, MI, DL, get(HalfImmOp), RegHi)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addImm(HiOffset / OffScale);
}
} else {
// Handle the _SGPR variant
MachineOperand *SOff = getNamedOperand(*MI, AMDGPU::OpName::soff);
Lo = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegLo)
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), 0, SBase->getSubReg())
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
.addOperand(*SOff)
.addImm(HalfSize);
Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
- .addOperand(*SBase)
+ .addReg(SBase->getReg(), getKillRegState(IsKill),
+ SBase->getSubReg())
.addReg(OffsetSGPR);
}
@@ -2003,7 +2018,8 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
// SMRD instructions take a dword offsets on SI and byte offset on VI
// and MUBUF instructions always take a byte offset.
ImmOffset = MI->getOperand(2).getImm();
- if (RI.ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ if (MBB->getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() <=
+ AMDGPUSubtarget::SEA_ISLANDS)
ImmOffset <<= 2;
RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
@@ -2043,13 +2059,15 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
.addImm(AMDGPU::sub3);
MI->setDesc(get(NewOpcode));
if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ MI->getOperand(2).setReg(SRsrc);
} else {
- MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ MI->getOperand(2).ChangeToRegister(SRsrc, false);
}
- MI->getOperand(1).setReg(SRsrc);
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
const TargetRegisterClass *NewDstRC =
RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 12dc3f3..a9aa99f 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -114,7 +114,7 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
- unsigned commuteOpcode(unsigned Opcode) const;
+ unsigned commuteOpcode(const MachineInstr &MI) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
bool NewMI = false) const override;
@@ -218,10 +218,6 @@ public:
bool isImmOperandLegal(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO) const;
- /// \brief Return true if the given offset Size in bytes can be folded into
- /// the immediate offsets of a memory instruction for the given address space.
- bool canFoldOffset(unsigned OffsetSize, unsigned AS) const;
-
/// \brief Return true if this 64-bit VALU instruction has a 32-bit encoding.
/// This function will return false if you pass it a 32-bit instruction.
bool hasVALU32BitEncoding(unsigned Opcode) const;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index e2747dc..d603ecb 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -264,6 +264,9 @@ def ds_offset0 : Operand<i8> {
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
}
+def gds : Operand <i1> {
+ let PrintMethod = "printGDS";
+}
def glc : Operand <i1> {
let PrintMethod = "printGLC";
}
@@ -284,6 +287,8 @@ def ClampMod : Operand <i1> {
} // End OperandType = "OPERAND_IMMEDIATE"
+def VOPDstS64 : VOPDstOperand <SReg_64>;
+
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
@@ -292,7 +297,7 @@ def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
@@ -315,6 +320,7 @@ def SIOperand {
def SRCMODS {
int NONE = 0;
+ int NEG = 1;
}
def DSTCLAMP {
@@ -516,7 +522,7 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $dst, $src0, $src1", []>;
+ opName#" $src0, $src1", []>;
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -637,9 +643,9 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
// Returns the register class to use for the destination of VOP[123C]
// instructions for the given VT.
class getVALUDstForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32,
- !if(!eq(VT.Size, 64), VReg_64,
- SReg_64)); // else VT == i1
+ RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
+ !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+ VOPDstOperand<SReg_64>)); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
@@ -717,7 +723,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getAsm32 <int NumSrcArgs> {
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = " $dst, $src0"#
+ string ret = "$dst, $src0"#
!if(!eq(NumSrcArgs, 1), "", src1)#
!if(!eq(NumSrcArgs, 3), src2, "");
}
@@ -733,7 +739,7 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<NumSrcArgs>.ret,
- " $dst, "#src0#src1#src2#"$clamp"#"$omod");
+ "$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
@@ -745,7 +751,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field ValueType Src0VT = ArgVT[1];
field ValueType Src1VT = ArgVT[2];
field ValueType Src2VT = ArgVT[3];
- field RegisterClass DstRC = getVALUDstForVT<DstVT>.ret;
+ field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret;
field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret;
field RegisterClass Src1RC32 = getVOPSrc1ForVT<Src1VT>.ret;
field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret;
@@ -761,7 +767,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = "_e32"#getAsm32<NumSrcArgs>.ret;
+ field string Asm32 = getAsm32<NumSrcArgs>.ret;
field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
}
@@ -788,22 +794,27 @@ def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = " $dst, $src0_modifiers, $src1";
+ let Asm64 = "$dst, $src0_modifiers, $src1";
}
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
+def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
+ let Asm64 = "$dst, $src0, $src1, $src2";
+}
def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
- field string Asm = " $dst, $src0, $vsrc1, $src2";
+ field string Asm = "$dst, $src0, $vsrc1, $src2";
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
@@ -835,23 +846,28 @@ class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
field bits<9> src0;
}
+class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.SI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
+ VOP1<op.VI, outs, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP1<op.VI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
- def _si : VOP1<op.SI, outs, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- // No VI instruction. This class is for SI only.
+ def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -862,13 +878,20 @@ class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
let isCodeGenOnly = 1;
}
+class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI>;
+
+class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
+ VOP2 <op.SI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI>;
+
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, string revOp> {
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
}
multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
@@ -876,10 +899,10 @@ multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
- def _vi : VOP2 <op.VI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+
+ def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+
}
class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
@@ -1047,9 +1070,10 @@ multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec> {
+ bit HasMods, bit defExec, string revOp> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
+ def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
@@ -1086,7 +1110,7 @@ multiclass VOP1_Helper <vop1 op, string opName, dag outs,
defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
@@ -1121,7 +1145,7 @@ multiclass VOP2_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1145,7 +1169,7 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
string revOp = opName> {
defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
- defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#"_e64"#P.Asm64,
+ defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1163,7 +1187,7 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#"_e64"#asm64, pat64, opName, revOp, HasMods
+ outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
>;
}
@@ -1189,7 +1213,7 @@ multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
string revOp, bit HasMods> {
defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#"_e64"#asm64, pat64, opName,
+ defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
revOp, HasMods>;
}
@@ -1235,28 +1259,30 @@ class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
}
multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec> {
+ string opName, bit DefExec, string revOpName = ""> {
def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
def _si : VOPC<op.SI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.SI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
def _vi : VOPC<op.VI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.VI> {
let Defs = !if(DefExec, [EXEC], []);
+ let hasSideEffects = DefExec;
}
}
multiclass VOPC_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>;
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>;
}
// Special case for class instructions which only have modifiers on
@@ -1264,20 +1290,21 @@ multiclass VOPC_Helper <vopc op, string opName,
multiclass VOPC_Class_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec> {
+ bit HasMods, bit DefExec, string revOp> {
defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#"_e64"#asm64, pat64,
- opName, HasMods, DefExec>,
+ defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
+ opName, HasMods, DefExec, revOp>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
+ string revOp = opName,
bit DefExec = 0> : VOPC_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1285,54 +1312,55 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, revOp
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
bit DefExec = 0> : VOPC_Class_Helper <
op, opName,
P.Ins32, P.Asm32, [],
- (outs SReg_64:$dst), P.Ins64, P.Asm64,
+ (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec
+ P.HasModifiers, DefExec, opName
>;
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+ VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
- PatLeaf cond = COND_NULL>
- : VOPCInst <op, opName, P, cond, 1>;
+ PatLeaf cond = COND_NULL,
+ string revOp = "">
+ : VOPCInst <op, opName, P, cond, revOp, 1>;
-multiclass VOPCX_F32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F32_F32_F32, cond>;
+multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
-multiclass VOPCX_F64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_F64_F64_F64, cond>;
+multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
-multiclass VOPCX_I32 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I32_I32_I32, cond>;
+multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
-multiclass VOPCX_I64 <vopc op, string opName, PatLeaf cond = COND_NULL> :
- VOPCX <op, opName, VOP_I64_I64_I64, cond>;
+multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
+ VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
- op, outs, ins, opName#asm, pat, opName, NumSrcArgs, HasMods
+ op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
@@ -1349,7 +1377,7 @@ multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
- op, opName, P.Outs, P.Ins64, P.Asm64,
+ op, opName, (outs P.DstRC.RegClass:$dst), P.Ins64, P.Asm64,
!if(!eq(P.NumSrcArgs, 3),
!if(P.HasModifiers,
[(set P.DstVT:$dst,
@@ -1381,7 +1409,7 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
op, opName,
- P.Outs,
+ (outs P.DstRC.RegClass:$dst),
(ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
@@ -1483,10 +1511,8 @@ class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DSe_vi <op>,
SIMCInstr <opName, SISubtarget.VI>;
-class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe <op>,
- SIMCInstr <opName, SISubtarget.SI> {
+class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_si <op,opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1494,10 +1520,8 @@ class DS_1A_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
- DS <outs, ins, asm, []>,
- DSe_vi <op>,
- SIMCInstr <opName, SISubtarget.VI> {
+class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
+ DS_Real_vi <op, opName, outs, ins, asm> {
// Single load interpret the 2 i8imm operands as a single i16 offset.
bits<16> offset;
@@ -1505,180 +1529,168 @@ class DS_1A_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
let offset1 = offset{15-8};
}
-multiclass DS_1A_Load_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
+multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset$gds"> {
- let data0 = 0, data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Load_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset",
- []>;
-
-multiclass DS_Load2_m <bits<8> op, string opName, dag outs, dag ins, string asm,
- list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data0 = 0, data1 = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Load2_m <
- op,
- asm,
- (outs regClass:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- M0Reg:$m0),
- asm#" $vdst, $addr"#"$offset0"#"$offset1",
- []>;
-
-multiclass DS_1A_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<opName, 0>;
+
+ let data1 = 0, vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_1A_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- []>;
-
-multiclass DS_Store_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat> {
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>;
-
- let vdst = 0 in {
- def _si : DS_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass>
- : DS_Store_m <
- op,
- asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset0"#"$offset1",
- []>;
-
-// 1 address, 1 data.
-multiclass DS_1A1D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- let data1 = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "",
+ dag outs = (outs rc:$vdst),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds,
+ M0Reg:$m0),
+ string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A1D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0"#"$offset", [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1,
- hasPostISelHook = 1 // Adjusted to no return version.
- in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 1>;
-
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = "", dag ins,
+ dag outs = (outs rc:$vdst),
+ string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
+
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = ""> : DS_1A2D_RET_m <
- op, asm,
- (outs rc:$vdst),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $vdst, $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
-
-// 1 address, 2 data.
-multiclass DS_1A2D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
- let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ string noRetOp = "", RegisterClass src = rc> :
+ DS_1A2D_RET_m <op, asm, rc, noRetOp,
+ (ins VGPR_32:$addr, src:$data0, src:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0)
+>;
- let vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
+multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
+ string noRetOp = opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+ ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 0>;
+
+ let vdst = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
}
}
-multiclass DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A2D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0, $data1"#"$offset",
- [], noRetOp>;
+multiclass DS_0A_RET <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins ds_offset:$offset, gds:$gds, M0Reg:$m0),
+ string asm = opName#" $vdst"#"$offset"#"$gds"> {
-// 1 address, 1 data.
-multiclass DS_1A1D_NORET_m <bits<8> op, string opName, dag outs, dag ins,
- string asm, list<dag> pat, string noRetOp> {
let mayLoad = 1, mayStore = 1 in {
- def "" : DS_Pseudo <opName, outs, ins, pat>,
- AtomicNoRet<noRetOp, 0>;
+ def "" : DS_Pseudo <opName, outs, ins, []>;
- let data1 = 0, vdst = 0 in {
- def _si : DS_1A_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_1A_Real_vi <op, opName, outs, ins, asm>;
- }
- }
+ let addr = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end addr = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
}
-multiclass DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc,
- string noRetOp = asm> : DS_1A1D_NORET_m <
- op, asm,
- (outs),
- (ins i1imm:$gds, VGPR_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
- asm#" $addr, $data0"#"$offset",
- [], noRetOp>;
+multiclass DS_1A_RET_GDS <bits<8> op, string opName,
+ dag outs = (outs VGPR_32:$vdst),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+ string asm = opName#" $vdst, $addr"#"$offset gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let data0 = 0, data1 = 0, gds = 1 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // end data0 = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A_GDS <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, M0Reg:$m0),
+ string asm = opName#" $addr gds"> {
+
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
+ def _si : DS_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
+ } // end vdst = 0, data = 0, data1 = 0, gds = 1
+}
+
+multiclass DS_1A <bits<8> op, string opName,
+ dag outs = (outs),
+ dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0, gds:$gds),
+ string asm = opName#" $addr"#"$offset"#"$gds"> {
+
+ let mayLoad = 1, mayStore = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>;
+
+ let vdst = 0, data0 = 0, data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ } // let vdst = 0, data0 = 0, data1 = 0
+ } // end mayLoad = 1, mayStore = 1
+}
//===----------------------------------------------------------------------===//
// MTBUF classes
@@ -1861,14 +1873,14 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
(ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
defm _OFFSET : MUBUFAtomicOffset_m <
op, name#"_offset", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+ slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
>;
} // glc = 0
@@ -1880,7 +1892,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
- mbuf_offset:$offset, SSrc_32:$soffset, slc:$slc),
+ SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -1889,8 +1901,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_OFFSET : MUBUFAtomicOffset_m <
op, name#"_rtn_offset", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, slc:$slc),
+ (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, slc:$slc),
name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
@@ -1909,9 +1921,8 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let mayLoad = 1, mayStore = 0 in {
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
- (ins SReg_128:$srsrc,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
- slc:$slc, tfe:$tfe),
+ (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
i32:$soffset, i16:$offset,
@@ -1920,7 +1931,7 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
@@ -1928,45 +1939,48 @@ multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
let offen = 0, idxen = 1 in {
defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VGPR_32:$vaddr,
- mbuf_offset:$offset, SCSrc_32:$soffset, glc:$glc,
+ (ins VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr,
- SCSrc_32:$soffset, mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset,
+ glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
+ "$glc"#"$slc"#"$tfe",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
i64:$vaddr, i32:$soffset,
- i16:$offset)))]>;
+ i16:$offset, i1:$glc, i1:$slc,
+ i1:$tfe)))]>;
}
}
}
multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
- ValueType store_vt, SDPatternOperator st> {
+ ValueType store_vt = i32, SDPatternOperator st = null_frag> {
let mayLoad = 0, mayStore = 1 in {
defm : MUBUF_m <op, name, (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
- "$glc"#"$slc"#"$tfe", []>;
+ "$glc"#"$slc"#"$tfe", []>;
let offen = 0, idxen = 0, vaddr = 0 in {
defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, mbuf_offset:$offset,
- SCSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
@@ -1974,21 +1988,40 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
let offen = 1, idxen = 0 in {
defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc, VGPR_32:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
"$glc"#"$slc"#"$tfe", []>;
} // end offen = 1, idxen = 0
- let offen = 0, idxen = 0, glc = 0, slc = 0, tfe = 0 in {
+ let offen = 0, idxen = 1 in {
+ defm _IDXEN : MUBUF_m <op, name#"_idxen", (outs),
+ (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+ slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 1, idxen = 1 in {
+ defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+ }
+
+ let offen = 0, idxen = 0 in {
defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
- (ins vdataClass:$vdata, SReg_128:$srsrc,
- VReg_64:$vaddr, SCSrc_32:$soffset,
- mbuf_offset:$offset),
- name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset",
+ (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
+ SCSrc_32:$soffset,
+ mbuf_offset:$offset, glc:$glc, slc:$slc,
+ tfe:$tfe),
+ name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
+ "$offset"#"$glc"#"$slc"#"$tfe",
[(st store_vt:$vdata,
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
- i32:$soffset, i16:$offset))]>;
+ i32:$soffset, i16:$offset,
+ i1:$glc, i1:$slc, i1:$tfe))]>;
}
} // End mayLoad = 0, mayStore = 1
}
@@ -2182,15 +2215,6 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4"]];
}
-// Maps an original opcode to its commuted version
-def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
@@ -2208,6 +2232,33 @@ def getCommuteOrig : InstrMapping {
let ValueCols = [["1"]];
}
+// Maps an original opcode to its commuted version
+def getCommuteRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+def getCommuteCmpOrig : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
+// Maps an original opcode to its commuted version
+def getCommuteCmpRev : InstrMapping {
+ let FilterClass = "VOP2_REV";
+ let RowFields = ["RevOp"];
+ let ColFields = ["IsOrig"];
+ let KeyCol = ["1"];
+ let ValueCols = [["0"]];
+}
+
+
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4f72e99..95b2470 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -28,6 +28,8 @@ def SendMsgImm : Operand<i32> {
def isGCN : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+def isSI : Predicate<"Subtarget->getGeneration() "
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -153,7 +155,9 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
+defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
+ [(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
+>;
defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
@@ -304,7 +308,8 @@ defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
>;
} // End Defs = [SCC]
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32", []>;
+defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
+ [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
defm S_BFM_B64 : SOP2_64 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
[(set i32:$dst, (mul i32:$src0, i32:$src1))]
@@ -505,31 +510,30 @@ def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
// VOPC Instructions
//===----------------------------------------------------------------------===//
-let isCompare = 1 in {
+let isCompare = 1, isCommutable = 1 in {
defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT>;
+defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE>;
+defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT>;
+defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32", COND_ULT, "v_cmp_nle_f32">;
defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE>;
+defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
@@ -543,233 +547,207 @@ defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT>;
+defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE>;
+defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT>;
+defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE>;
+defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-} // End hasSideEffects = 1
let SubtargetPredicate = isSICI in {
defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-let hasSideEffects = 1 in {
defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-
-defm V_CMPSX_F_F64 : VOPC_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPC_F64 <vopc<0x71>, "v_cmpsx_lt_f64">;
-defm V_CMPSX_EQ_F64 : VOPC_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPC_F64 <vopc<0x73>, "v_cmpsx_le_f64">;
-defm V_CMPSX_GT_F64 : VOPC_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPC_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPC_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPC_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPC_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPC_F64 <vopc<0x79>, "v_cmpsx_nge_f64">;
-defm V_CMPSX_NLG_F64 : VOPC_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPC_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64">;
-defm V_CMPSX_NLE_F64 : VOPC_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPC_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPC_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPC_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End hasSideEffects = 1, Defs = [EXEC]
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
} // End SubtargetPredicate = isSICI
defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT>;
+defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE>;
+defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT>;
+defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE>;
+defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT>;
+defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE>;
+defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-let hasSideEffects = 1 in {
defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT>;
+defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE>;
+defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-let hasSideEffects = 1 in {
-
defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-} // End hasSideEffects = 1
+} // End isCompare = 1, isCommutable = 1
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-} // End hasSideEffects = 1
-
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-
-let hasSideEffects = 1 in {
defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-} // End hasSideEffects = 1
-
-} // End isCompare = 1
//===----------------------------------------------------------------------===//
// DS Instructions
//===----------------------------------------------------------------------===//
-
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
@@ -782,12 +760,26 @@ defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
+let mayLoad = 0 in {
+defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
+defm DS_WRITE2_B32 : DS_1A1D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
+defm DS_WRITE2ST64_B32 : DS_1A1D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
+}
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
+defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
+defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
+
+defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
+defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
+defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
+defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
+defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
+let mayLoad = 0 in {
+defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
+defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
+}
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
@@ -800,20 +792,34 @@ defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32"
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
+defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
-//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
+defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
+ 0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
+>;
+defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
+ 0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
+>;
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
+defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
+defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
-
-
+defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+let mayStore = 0 in {
+defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
+defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
+defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
+defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
+defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
+defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
+defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
+}
+defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
+defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
+defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
@@ -826,7 +832,12 @@ defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
+defm DS_WRITE2_B64 : DS_1A1D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
+defm DS_WRITE2ST64_B64 : DS_1A1D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
+}
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
@@ -844,57 +855,88 @@ defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64"
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
+defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
-//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
+defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
+defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
+let mayStore = 0 in {
+defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
+defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
+defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
+}
+
+defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
+defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
+defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
+defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
+defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
+defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
+defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
+defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
+defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
+defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
+defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
+defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
+defm DS_WRITE_SRC2_B32 : DS_1A <0x8c, "ds_write_src2_b32">;
+
+defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
+defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
+
+defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
+defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
+defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
+defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
+defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
+defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
+defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
+defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
+defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
+defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
+defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
+defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
+defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
+
+defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
+defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
+
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
// DS_CONDXCHG32_RTN_B128
//} // End isCI
-// TODO: _SRC2_* forms
-
-defm DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "ds_write_b32", VGPR_32>;
-defm DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "ds_write_b16", VGPR_32>;
-defm DS_WRITE_B64 : DS_Store_Helper <0x00000004d, "ds_write_b64", VReg_64>;
-
-defm DS_READ_B32 : DS_Load_Helper <0x00000036, "ds_read_b32", VGPR_32>;
-defm DS_READ_I8 : DS_Load_Helper <0x00000039, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_Load_Helper <0x0000003a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_Load_Helper <0x0000003b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_Load_Helper <0x0000003c, "ds_read_u16", VGPR_32>;
-defm DS_READ_B64 : DS_Load_Helper <0x00000076, "ds_read_b64", VReg_64>;
-
-// 2 forms.
-defm DS_WRITE2_B32 : DS_Store2_Helper <0x0000000E, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_Store2_Helper <0x0000000F, "ds_write2st64_b32", VGPR_32>;
-defm DS_WRITE2_B64 : DS_Store2_Helper <0x0000004E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg_64>;
-
-defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
-defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
-//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
-//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
-//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
-//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
-//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
-//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
-//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
+ mubuf<0x00>, "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
+ mubuf<0x01>, "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
+ mubuf<0x02>, "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
+ mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
+ mubuf<0x04>, "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
+ mubuf<0x05>, "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
+ mubuf<0x06>, "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
+ mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
+>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
@@ -1418,13 +1460,17 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
// VOP2 Instructions
//===----------------------------------------------------------------------===//
-defm V_CNDMASK_B32_e64 : VOP3_m_nomods <vop3<0x100>, (outs VGPR_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2),
- "v_cndmask_b32_e64 $dst, $src0, $src1, $src2",
- [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))],
- "v_cndmask_b32_e64", 3
->;
+multiclass V_CNDMASK <vop2 op, string name> {
+ defm _e32 : VOP2_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
+ name, name>;
+
+ defm _e64 : VOP3_m <
+ op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
+ name#!cast<string>(VOP_CNDMASK.Asm64), [], name, 3>;
+}
+defm V_CNDMASK_B32 : V_CNDMASK<vop2<0x0>, "v_cndmask_b32">;
let isCommutable = 1 in {
defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
@@ -1568,8 +1614,8 @@ defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
>;
} // End isCommutable = 1
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
- AMDGPUbfm
+defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
+ VOP_I32_I32_I32
>;
defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
@@ -1638,14 +1684,12 @@ defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
VOP_F32_F32_F32_F32
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
VOP_I32_I32_I32_I32, AMDGPUbfe_u32
>;
defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
VOP_I32_I32_I32_I32, AMDGPUbfe_i32
>;
-}
defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
VOP_I32_I32_I32_I32, AMDGPUbfi
@@ -1833,6 +1877,11 @@ defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, isPseudo = 1 in {
+// For use in patterns
+def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
+ (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
+>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
@@ -2049,7 +2098,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
+ (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;
/* int_SI_export */
@@ -2196,6 +2245,11 @@ def : Pat <
(V_BCNT_U32_B32_e64 $popcnt, $val)
>;
+def : Pat <
+ (i32 (select i1:$src0, i32:$src1, i32:$src2)),
+ (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
@@ -2738,7 +2792,7 @@ def : Ext32Pat <anyext>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
- (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
+ (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
>;
// The multiplication scales from [0,1] to the unsigned integer range
@@ -2781,7 +2835,7 @@ def : ROTRPattern <V_ALIGNBIT_B32>;
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
@@ -2799,12 +2853,12 @@ def : DSReadPat <DS_READ_B64, v2i32, local_load_aligned8bytes>;
def : Pat <
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
+ (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0), (S_MOV_B32 -1))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
@@ -2819,14 +2873,14 @@ def : DSWritePat <DS_WRITE_B64, v2i32, local_store_aligned8bytes>;
def : Pat <
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
- (DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
- (S_MOV_B32 -1))
+ (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $value, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
@@ -2842,13 +2896,13 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
+ (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0), (S_MOV_B32 -1))
>;
@@ -2898,8 +2952,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
PatFrag constant_ld> {
def : Pat <
- (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
- (Instr_ADDR64 $srsrc, $vaddr, $soffset, $offset)
+ (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+ i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
>;
}
@@ -2916,7 +2971,7 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
- (Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
@@ -2935,7 +2990,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offset $rsrc, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2943,7 +2998,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
- (offen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
@@ -2951,7 +3006,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
+ (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), (as_i1imm $tfe))
>;
@@ -2959,7 +3014,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
(vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm:$offset, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
- (bothen $rsrc, $vaddr, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+ (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
(as_i1imm $tfe))
>;
}
@@ -2974,7 +3029,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
+ (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
@@ -3104,26 +3159,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
- (vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
+ (SI_INDIRECT_SRC $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (vector_extract vt:$vec, i32:$idx),
- (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (vector_extract vt:$vec, i32:$idx)),
+ (SI_INDIRECT_SRC $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
+ (IndDst $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
+ (IndDst $vec, $idx, 0, $val)
>;
}
@@ -3269,6 +3324,89 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
+multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+ def : Pat <
+ (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (BFM $a, $b)
+ >;
+
+ def : Pat <
+ (vt (add (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (MOV 0))
+ >;
+}
+
+defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
+// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+
+def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+
+//===----------------------------------------------------------------------===//
+// Fract Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI] in {
+
+// V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
+// used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient
+// way to implement it is using V_FRACT_F64.
+// The workaround for the V_FRACT bug is:
+// fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999)
+
+// Convert (x + (-floor(x)) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
+>;
+
+// Convert floor(x) to (x - fract(x))
+def : Pat <
+ (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))),
+ (V_ADD_F64
+ $mods,
+ $x,
+ SRCMODS.NEG,
+ (V_CNDMASK_B64_PSEUDO
+ $x,
+ (V_MIN_F64
+ SRCMODS.NONE,
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
+ SRCMODS.NONE,
+ (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
+ DSTCLAMP.NONE, DSTOMOD.NONE),
+ (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+ DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isSI]
+
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/lib/Target/R600/SILoadStoreOptimizer.cpp b/lib/Target/R600/SILoadStoreOptimizer.cpp
index 46630d0..a927ad8 100644
--- a/lib/Target/R600/SILoadStoreOptimizer.cpp
+++ b/lib/Target/R600/SILoadStoreOptimizer.cpp
@@ -45,6 +45,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -249,10 +250,10 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
DebugLoc DL = I->getDebugLoc();
MachineInstrBuilder Read2
= BuildMI(*MBB, I, DL, Read2Desc, DestReg)
- .addImm(0) // gds
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // M0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
@@ -332,12 +333,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
MachineInstrBuilder Write2
= BuildMI(*MBB, I, DL, Write2Desc)
- .addImm(0) // gds
.addOperand(*Addr) // addr
.addOperand(*Data0) // data0
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addImm(0) // gds
.addOperand(*M0Reg) // m0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 9224e14..13a8974 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,7 +14,6 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -25,9 +24,7 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
-: AMDGPURegisterInfo(st)
- { }
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -47,14 +44,34 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VGPR255);
Reserved.set(AMDGPU::VGPR254);
+ // Tonga and Iceland can only allocate a fixed number of SGPRs due
+ // to a hw bug.
+ if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
+ // Assume XNACK_MASK is unused.
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
+
+ for (unsigned i = Limit; i < NumSGPRs; ++i) {
+ unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
+ MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+ }
+ }
+
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
- unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
- unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
+ STI.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
@@ -125,9 +142,10 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
int64_t Offset,
RegScavenger *RS) const {
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
MachineBasicBlock *MBB = MI->getParent();
const MachineFunction *MF = MI->getParent()->getParent();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
LLVMContext &Ctx = MF->getFunction()->getContext();
DebugLoc DL = MI->getDebugLoc();
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
@@ -162,8 +180,8 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addImm(Offset)
.addReg(SOffset)
+ .addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // tfe
@@ -178,7 +196,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF->getSubtarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineOperand &FIOp = MI->getOperand(FIOperandNum);
@@ -249,7 +268,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg);
}
}
- TII->insertNOPs(MI, 3);
+
+ // TODO: only do this when it is needed
+ switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
+ case AMDGPUSubtarget::SOUTHERN_ISLANDS:
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
+ TII->insertNOPs(MI, 3);
+ break;
+ case AMDGPUSubtarget::SEA_ISLANDS:
+ break;
+ default: // VOLCANIC_ISLANDS and later
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
+ // and later. This also applies to VALUs which write VCC, but we're
+ // unlikely to see VMEM use VCC.
+ TII->insertNOPs(MI, 4);
+ }
+
MI->eraseFromParent();
break;
}
@@ -494,14 +528,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
}
}
-unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
- switch(WaveCount) {
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return 103;
+unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const {
+ if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ switch (WaveCount) {
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return 102;
+ }
+ } else {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
}
}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index d908ffd..bfdb67c 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -17,17 +17,19 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/Support/Debug.h"
namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
- SIRegisterInfo(const AMDGPUSubtarget &st);
+ SIRegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureSetLimit(unsigned Idx) const override;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -111,7 +113,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
- unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+ unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
+ unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 8b25e95..7bb5dc2 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -256,10 +256,3 @@ def VSrc_64 : RegImmOperand<VS_64>;
def VCSrc_32 : RegInlineOperand<VS_32>;
def VCSrc_64 : RegInlineOperand<VS_64>;
-
-//===----------------------------------------------------------------------===//
-// SGPR and VGPR register classes
-//===----------------------------------------------------------------------===//
-
-def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
- (add VReg_128, SReg_128)>;
diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp
index 97bbd78..51e72cd 100644
--- a/lib/Target/R600/SIShrinkInstructions.cpp
+++ b/lib/Target/R600/SIShrinkInstructions.cpp
@@ -18,9 +18,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "si-shrink-instructions"
@@ -88,6 +89,11 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
// Can't shrink instruction with three operands.
+ // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
+ // a special case for it. It can only be shrunk if the third operand
+ // is vcc. We should handle this the same way we handle vopc, by addding
+ // a register allocation hint pre-regalloc and then do the shrining
+ // post-regalloc.
if (Src2)
return false;
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 0fa56e6..282d923 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1268,7 +1268,8 @@ int foo (void) {
..
else if (strchr ("<>", *intel_parser.op_string)
-Those should be turned into a switch.
+Those should be turned into a switch. SimplifyLibCalls only gets the second
+case.
//===---------------------------------------------------------------------===//
@@ -1843,44 +1844,6 @@ we remove checking in code like
//===---------------------------------------------------------------------===//
-This code (from Benchmarks/Dhrystone/dry.c):
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %sext = shl i32 %0, 24
- %conv = ashr i32 %sext, 24
- %sext6 = shl i32 %1, 24
- %conv4 = ashr i32 %sext6, 24
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-
-Should be simplified into something like:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %sext = shl i32 %0, 24
- %conv = and i32 %sext, 0xFF000000
- %sext6 = shl i32 %1, 24
- %conv4 = and i32 %sext6, 0xFF000000
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-
-and then to:
-
-define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
-entry:
- %conv = and i32 %0, 0xFF
- %conv4 = and i32 %1, 0xFF
- %cmp = icmp eq i32 %conv, %conv4
- %. = select i1 %cmp, i32 10000, i32 0
- ret i32 %.
-}
-//===---------------------------------------------------------------------===//
-
clang -O3 currently compiles this code
int g(unsigned int a) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 5128843..598856f 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -74,7 +74,6 @@ public:
MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new SparcMCCodeEmitter(Ctx);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index f72c6c4..3a6f508 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -62,8 +62,8 @@ private:
const VariantKind Kind;
const MCExpr *Expr;
- explicit SparcMCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit SparcMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
public:
/// @name Construction
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 3cc4314..630ed1b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -122,25 +122,16 @@ static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- MCStreamer *S = createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
- new SparcTargetELFStreamer(*S);
- return S;
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new SparcTargetELFStreamer(S);
}
-static MCStreamer *
-createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
-
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new SparcTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new SparcTargetAsmStreamer(S, OS);
}
static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
@@ -157,54 +148,37 @@ extern "C" void LLVMInitializeSparcTargetMC() {
RegisterMCAsmInfoFn X(TheSparcTarget, createSparcMCAsmInfo);
RegisterMCAsmInfoFn Y(TheSparcV9Target, createSparcV9MCAsmInfo);
+ for (Target *T : {&TheSparcTarget, &TheSparcV9Target}) {
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createSparcMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createSparcMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createSparcMCSubtargetInfo);
+
+ // Register the MC Code Emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createSparcMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createSparcAsmBackend);
+
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+
+ // Register the MCInstPrinter
+ TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter);
+ }
+
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
createSparcMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
createSparcV9MCCodeGenInfo);
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheSparcV9Target, createSparcMCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheSparcTarget, createSparcMCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheSparcV9Target,
- createSparcMCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
- createSparcMCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheSparcV9Target,
- createSparcMCSubtargetInfo);
-
- // Register the MC Code Emitter.
- TargetRegistry::RegisterMCCodeEmitter(TheSparcTarget,
- createSparcMCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheSparcV9Target,
- createSparcMCCodeEmitter);
-
- //Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheSparcTarget,
- createSparcAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheSparcV9Target,
- createSparcAsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheSparcTarget,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheSparcV9Target,
- createMCStreamer);
-
- // Register the asm streamer.
- TargetRegistry::RegisterAsmStreamer(TheSparcTarget,
- createMCAsmStreamer);
- TargetRegistry::RegisterAsmStreamer(TheSparcV9Target,
- createMCAsmStreamer);
-
- // Register the MCInstPrinter
- TargetRegistry::RegisterMCInstPrinter(TheSparcTarget,
- createSparcMCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheSparcV9Target,
- createSparcMCInstPrinter);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index c31943d..d2ec991 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -33,7 +33,6 @@ extern Target TheSparcV9Target;
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createSparcAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 9f03b04..1cf5ccf 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -50,7 +50,7 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
const char *getPassName() const override {
@@ -195,12 +195,13 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
/// inline asm expressions.
bool
SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m: // memory
if (!SelectADDRrr(Op, Op0, Op1))
SelectADDRri(Op, Op0, Op1);
break;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 6774977..c8b0570 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -915,9 +915,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = ((hasReturnsTwice)
- ? TRI->getRTCallPreservedMask(CallConv)
- : TRI->getCallPreservedMask(CallConv));
+ const uint32_t *Mask =
+ ((hasReturnsTwice)
+ ? TRI->getRTCallPreservedMask(CallConv)
+ : TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1229,7 +1230,8 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask =
((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
- : TRI->getCallPreservedMask(CLI.CallConv));
+ : TRI->getCallPreservedMask(DAG.getMachineFunction(),
+ CLI.CallConv));
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1904,8 +1906,8 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
Ops.push_back(Callee);
Ops.push_back(Symbol);
Ops.push_back(DAG.getRegister(SP::O0, PtrVT));
- const uint32_t *Mask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+ DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(InFlag);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 8b2e6bc..4b70f16 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -33,9 +33,8 @@ using namespace llvm;
void SparcInstrInfo::anchor() {}
SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
- : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
- RI(ST), Subtarget(ST) {
-}
+ : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP), RI(),
+ Subtarget(ST) {}
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index fe93ed7..6e08418 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -22,6 +22,8 @@
namespace llvm {
+class SparcSubtarget;
+
/// SPII - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 3cca98f..9667bc0 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -34,17 +34,16 @@ static cl::opt<bool>
ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
cl::desc("Reserve application registers (%g2-%g4)"));
-SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
- : SparcGenRegisterInfo(SP::O7), Subtarget(st) {
-}
+SparcRegisterInfo::SparcRegisterInfo() : SparcGenRegisterInfo(SP::O7) {}
const MCPhysReg*
SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
}
-const uint32_t*
-SparcRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+SparcRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
return CSR_RegMask;
}
@@ -55,6 +54,7 @@ SparcRegisterInfo::getRTCallPreservedMask(CallingConv::ID CC) const {
BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
// FIXME: G1 reserved for now for large imm generation by frame code.
Reserved.set(SP::G1);
@@ -89,6 +89,7 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
const TargetRegisterClass*
SparcRegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
return Subtarget.is64Bit() ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
}
@@ -160,6 +161,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
MI.getOperand(FIOperandNum + 1).getImm() +
Subtarget.getStackPointerBias();
@@ -174,7 +176,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned SrcEvenReg = getSubReg(SrcReg, SP::sub_even64);
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
@@ -186,7 +188,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
} else if (MI.getOpcode() == SP::LDQFri) {
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
unsigned DestReg = MI.getOperand(0).getReg();
unsigned DestEvenReg = getSubReg(DestReg, SP::sub_even64);
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 63567b0..764a894 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -20,20 +20,13 @@
#include "SparcGenRegisterInfo.inc"
namespace llvm {
-
-class SparcSubtarget;
-class TargetInstrInfo;
-class Type;
-
struct SparcRegisterInfo : public SparcGenRegisterInfo {
- SparcSubtarget &Subtarget;
-
- SparcRegisterInfo(SparcSubtarget &st);
+ SparcRegisterInfo();
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
- const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 1c423dc..6979a17 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -56,12 +56,11 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- TLOF(make_unique<SparcELFTargetObjectFile>()),
- DL(computeDataLayout(is64bit)),
- Subtarget(TT, CPU, FS, *this, is64bit) {
+ CodeGenOpt::Level OL, bool is64bit)
+ : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM,
+ CM, OL),
+ TLOF(make_unique<SparcELFTargetObjectFile>()),
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 4f93980..30a8ebf 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -22,7 +22,6 @@ namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
SparcSubtarget Subtarget;
public:
SparcTargetMachine(const Target &T, StringRef TT,
@@ -31,8 +30,9 @@ public:
CodeGenOpt::Level OL, bool is64bit);
~SparcTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const SparcSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index d9bb916..40dc48e 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -110,7 +110,6 @@ private:
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &MCSTI,
MCContext &Ctx) {
return new SystemZMCCodeEmitter(MCII, Ctx);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 6e82b6d..ffd05a9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -181,14 +181,6 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
-static MCStreamer *
-createSystemZMCObjectStreamer(const Target &T, StringRef TT, MCContext &Ctx,
- MCAsmBackend &MAB, raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
-}
-
extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
@@ -221,8 +213,4 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
createSystemZMCInstPrinter);
-
- // Register the MCObjectStreamer;
- TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
- createSystemZMCObjectStreamer);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 5eb6526..962c950 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -71,7 +71,6 @@ inline unsigned getRegAsGRH32(unsigned Reg) {
MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index b8b0db9..a52aa25 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -328,7 +328,7 @@ public:
// Override SelectionDAGISel.
SDNode *Select(SDNode *Node) override;
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
@@ -1129,18 +1129,29 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
bool SystemZDAGToDAGISel::
SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "Unexpected constraint code");
- // Accept addresses with short displacements, which are compatible
- // with Q, R, S and T. But keep the index operand for future expansion.
- SDValue Base, Disp, Index;
- if (!selectBDXAddr(SystemZAddressingMode::FormBD,
- SystemZAddressingMode::Disp12Only,
- Op, Base, Disp, Index))
- return true;
- OutOps.push_back(Base);
- OutOps.push_back(Disp);
- OutOps.push_back(Index);
- return false;
+ switch(ConstraintID) {
+ default:
+ llvm_unreachable("Unexpected asm memory constraint");
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_Q:
+ case InlineAsm::Constraint_R:
+ case InlineAsm::Constraint_S:
+ case InlineAsm::Constraint_T:
+ // Accept addresses with short displacements, which are compatible
+ // with Q, R, S and T. But keep the index operand for future expansion.
+ SDValue Base, Disp, Index;
+ if (selectBDXAddr(SystemZAddressingMode::FormBD,
+ SystemZAddressingMode::Disp12Only,
+ Op, Base, Disp, Index)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Disp);
+ OutOps.push_back(Index);
+ return false;
+ }
+ break;
+ }
+ return true;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e96398d..0ca8bcd 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -920,7 +920,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1858,7 +1858,8 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C);
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index a2b10b0..23c62c9 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -233,6 +233,26 @@ public:
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ if (ConstraintCode.size() == 1) {
+ switch(ConstraintCode[0]) {
+ default:
+ break;
+ case 'Q':
+ return InlineAsm::Constraint_Q;
+ case 'R':
+ return InlineAsm::Constraint_R;
+ case 'S':
+ return InlineAsm::Constraint_S;
+ case 'T':
+ return InlineAsm::Constraint_T;
+ }
+ }
+ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+ }
+
MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const
override;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 8488ec8..5128993 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -743,11 +743,10 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
}
-MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Opcode = MI->getOpcode();
@@ -862,9 +861,9 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
MachineInstr *
-SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
+SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const {
return nullptr;
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index e711f89..b55810b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -186,11 +186,11 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
override;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 64f5eeb..7cabea9 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -28,7 +28,8 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
const uint32_t *
-SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
return CSR_SystemZ_RegMask;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 212fe91..a0db5a9 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -43,9 +43,9 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
- override;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 73198b1..86baccb 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -25,12 +25,12 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ : LLVMTargetMachine(T, "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64",
+ TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<TargetLoweringObjectFileELF>()),
- // Make sure that global data has at least 16 bits of alignment by
- // default, so that we can refer to it using LARL. We don't have any
- // special requirements for stack variables though.
- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 52ccc5a..181b926 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -24,7 +24,6 @@ class TargetFrameLowering;
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL;
SystemZSubtarget Subtarget;
public:
@@ -34,9 +33,8 @@ public:
CodeGenOpt::Level OL);
~SystemZTargetMachine() override;
- // Override TargetMachine.
- const DataLayout *getDataLayout() const override { return &DL; }
- const SystemZSubtarget *getSubtargetImpl() const override {
+ const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
// Override LLVMTargetMachine
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 5b7953d..1b74e8c 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -34,7 +34,6 @@ inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfoImpl *P) {
}
void llvm::initializeTarget(PassRegistry &Registry) {
- initializeDataLayoutPassPass(Registry);
initializeTargetLibraryInfoWrapperPassPass(Registry);
initializeTargetTransformInfoWrapperPassPass(Registry);
}
@@ -48,9 +47,6 @@ LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
}
void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
- // The DataLayoutPass must now be in sync with the module. Unfortunatelly we
- // cannot enforce that from the C api.
- unwrap(PM)->add(new DataLayoutPass());
}
void LLVMAddTargetLibraryInfo(LLVMTargetLibraryInfoRef TLI,
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index faa6fbe..75100fb 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -343,3 +343,9 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
// null return could mean 'no location' & we should just do that here.
return MCSymbolRefExpr::Create(Sym, *Ctx);
}
+
+void TargetLoweringObjectFile::getNameWithPrefix(
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+ bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+ Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 307e93c..dd07f81 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
@@ -36,18 +37,20 @@ using namespace llvm;
// TargetMachine Class
//
-TargetMachine::TargetMachine(const Target &T,
+TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options)
- : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
- CodeGenInfo(nullptr), AsmInfo(nullptr),
- RequireStructuredCFG(false),
- Options(Options) {
-}
+ : TheTarget(T), DL(DataLayoutString), TargetTriple(TT), TargetCPU(CPU),
+ TargetFS(FS), CodeGenInfo(nullptr), AsmInfo(nullptr), MRI(nullptr),
+ MII(nullptr), STI(nullptr), RequireStructuredCFG(false),
+ Options(Options) {}
TargetMachine::~TargetMachine() {
delete CodeGenInfo;
delete AsmInfo;
+ delete MRI;
+ delete MII;
+ delete STI;
}
/// \brief Reset the target options based on the function's attributes.
@@ -177,7 +180,7 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
- Mang.getNameWithPrefix(Name, GV, CannotUsePrivateLabel);
+ TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
}
MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index c7838a9..236cb1b 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -198,8 +198,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
*ErrorMessage = strdup(error.c_str());
return true;
}
- Mod->setDataLayout(td);
- pass.add(new DataLayoutPass());
+ Mod->setDataLayout(*td);
TargetMachine::CodeGenFileType ft;
switch (codegen) {
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 10597a8..b2bb59e 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -23,22 +23,6 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
-// Temporary option to compare overall performance change when moving from the
-// SD scheduler to the MachineScheduler pass pipeline. This is convenient for
-// benchmarking during the transition from SD to MI scheduling. Once armv7 makes
-// the switch, it should go away. The normal way to enable/disable the
-// MachineScheduling pass itself is by using -enable-misched. For targets that
-// already use MI sched (via MySubTarget::enableMachineScheduler())
-// -misched-bench=false negates the subtarget hook.
-static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
- cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
-
-bool TargetSubtargetInfo::useMachineScheduler() const {
- if (BenchMachineSched.getNumOccurrences())
- return BenchMachineSched;
- return enableMachineScheduler();
-}
-
bool TargetSubtargetInfo::enableAtomicExpand() const {
return true;
}
@@ -47,6 +31,10 @@ bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
+bool TargetSubtargetInfo::enableJoinGlobalCopies() const {
+ return enableMachineScheduler();
+}
+
bool TargetSubtargetInfo::enableRALocalReassignment(
CodeGenOpt::Level OptLevel) const {
return true;
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index 08646d0..7194dd3 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -1,8 +1,10 @@
LOCAL_PATH := $(call my-dir)
x86_codegen_TBLGEN_TABLES := \
+ X86GenAsmMatcher.inc \
X86GenAsmWriter.inc \
X86GenAsmWriter1.inc \
+ X86GenDisassemblerTables.inc \
X86GenRegisterInfo.inc \
X86GenInstrInfo.inc \
X86GenDAGISel.inc \
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 0b6fb52..c24805a 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,6 +11,7 @@
#include "X86AsmInstrumentation.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
+#include "X86ISelLowering.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
@@ -664,6 +665,7 @@ private:
ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
std::unique_ptr<X86Operand>
ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
+ std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
SMLoc Start,
@@ -1407,6 +1409,35 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
+//ParseRoundingModeOp - Parse AVX-512 rounding mode operand
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
+ MCAsmParser &Parser = getParser();
+ const AsmToken &Tok = Parser.getTok();
+ consumeToken(); // Eat "{"
+ if (Tok.getIdentifier().startswith("r")){
+ int rndMode = StringSwitch<int>(Tok.getIdentifier())
+ .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
+ .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
+ .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
+ .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
+ .Default(-1);
+ if (-1 == rndMode)
+ return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
+ Parser.Lex(); // Eat "r*" of r*-sae
+ if (!getLexer().is(AsmToken::Minus))
+ return ErrorOperand(Tok.getLoc(), "Expected - at this point");
+ Parser.Lex(); // Eat "-"
+ Parser.Lex(); // Eat the sae
+ if (!getLexer().is(AsmToken::RCurly))
+ return ErrorOperand(Tok.getLoc(), "Expected } at this point");
+ Parser.Lex(); // Eat "}"
+ const MCExpr *RndModeOp =
+ MCConstantExpr::Create(rndMode, Parser.getContext());
+ return X86Operand::CreateImm(RndModeOp, Start, End);
+ }
+ return ErrorOperand(Tok.getLoc(), "unknown token in expression");
+}
/// ParseIntelMemOperand - Parse intel style memory operand.
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
SMLoc Start,
@@ -1656,6 +1687,11 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return ParseIntelMemOperand(Imm, Start, Size);
}
+ // rounding mode token
+ if (STI.getFeatureBits() & X86::FeatureAVX512 &&
+ getLexer().is(AsmToken::LCurly))
+ return ParseRoundingModeOp(Start, End);
+
// Register.
unsigned RegNo = 0;
if (!ParseRegister(RegNo, Start, End)) {
@@ -1708,6 +1744,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
+ case AsmToken::LCurly:{
+ SMLoc Start = Parser.getTok().getLoc(), End;
+ if (STI.getFeatureBits() & X86::FeatureAVX512)
+ return ParseRoundingModeOp(Start, End);
+ return ErrorOperand(Start, "unknown token in expression");
+ }
}
}
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index d67e119..94dbedb 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -260,6 +260,9 @@ struct X86Operand : public MCParsedAsmOperand {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
!getMemIndexReg() && getMemScale() == 1;
}
+ bool isAVX512RC() const{
+ return isImm();
+ }
bool isAbsMem16() const {
return isAbsMem() && Mem.ModeSize == 16;
@@ -394,7 +397,10 @@ struct X86Operand : public MCParsedAsmOperand {
RegNo = getGR32FromGR64(RegNo);
Inst.addOperand(MCOperand::CreateReg(RegNo));
}
-
+ void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 99fb1ab..e8c5475 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -378,26 +378,28 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
unsigned NewOpc;
switch (mcInst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
- case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
- case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
- case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
- case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
- case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
- case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
- case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
- case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
- case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
- case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
- case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
- case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
- case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
- case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
- case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
- case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
- case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
- case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
- case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
- case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
+ case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break;
+ case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break;
+ case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break;
+ case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break;
+ case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break;
+ case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break;
+ case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break;
+ case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break;
+ case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break;
+ case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break;
+ case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break;
+ case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break;
+ case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break;
+ case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break;
+ case X86::VCMPPDZrrib: NewOpc = X86::VCMPPDZrrib_alt; break;
+ case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break;
+ case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break;
+ case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
+ case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break;
+ case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break;
+ case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break;
+ case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break;
}
// Switch opcode to the one that doesn't get special printing.
mcInst.setOpcode(NewOpc);
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 619a0d4..7c9e012 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -310,11 +310,8 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
- if (insn->prefixPresent[prefix] == 1 &&
- insn->prefixLocations[prefix] == location)
- return true;
- else
- return false;
+ return insn->prefixPresent[prefix] == 1 &&
+ insn->prefixLocations[prefix] == location;
}
/*
@@ -1458,6 +1455,8 @@ static int readModRM(struct InternalInstruction* insn) {
case TYPE_VK1: \
case TYPE_VK8: \
case TYPE_VK16: \
+ if (index > 7) \
+ *valid = 0; \
return prefix##_K0 + index; \
case TYPE_MM64: \
return prefix##_MM0 + (index & 0x7); \
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 70c6042..9e65050 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -485,18 +485,6 @@ struct OperandSpecifier {
uint8_t type;
};
-// Indicates where the opcode modifier (if any) is to be found. Extended
-// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
-#define MODIFIER_TYPES \
- ENUM_ENTRY(MODIFIER_NONE)
-
-#define ENUM_ENTRY(n) n,
-enum ModifierType {
- MODIFIER_TYPES
- MODIFIER_max
-};
-#undef ENUM_ENTRY
-
static const unsigned X86_MAX_OPERANDS = 6;
/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 719b761..a400d46 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -76,8 +76,8 @@ class X86AsmBackend : public MCAsmBackend {
bool HasNopl;
const uint64_t MaxNopLength;
public:
- X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) {
+ X86AsmBackend(const Target &T, StringRef CPU)
+ : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
@@ -351,8 +351,8 @@ namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
uint8_t OSABI;
- ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU)
- : X86AsmBackend(T, CPU), OSABI(_OSABI) {}
+ ELFX86AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : X86AsmBackend(T, CPU), OSABI(OSABI) {}
};
class ELFX86_32AsmBackend : public ELFX86AsmBackend {
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index e8b0b4c..76a9d2b 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -38,231 +38,214 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
X86ELFObjectWriter::~X86ELFObjectWriter()
{}
-unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- // determine the type of the relocation
+enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
- MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
- unsigned Type;
- if (getEMachine() == ELF::EM_X86_64) {
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
- case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
- case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
- case FK_Data_1: Type = ELF::R_X86_64_PC8; break;
+static X86_64RelType getType64(unsigned Kind,
+ MCSymbolRefExpr::VariantKind &Modifier,
+ bool &IsPCRel) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case X86::reloc_global_offset_table8:
+ Modifier = MCSymbolRefExpr::VK_GOT;
+ IsPCRel = true;
+ return RT64_64;
+ case FK_Data_8:
+ return RT64_64;
+ case X86::reloc_signed_4byte:
+ if (Modifier == MCSymbolRefExpr::VK_None && !IsPCRel)
+ return RT64_32S;
+ return RT64_32;
+ case X86::reloc_global_offset_table:
+ Modifier = MCSymbolRefExpr::VK_GOT;
+ IsPCRel = true;
+ return RT64_32;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return RT64_32;
+ case FK_Data_2:
+ return RT64_16;
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return RT64_8;
+ }
+}
- case FK_PCRel_8:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC64;
- break;
- case X86::reloc_signed_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- case X86::reloc_riprel_4byte:
- case FK_PCRel_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_X86_64_PLT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_X86_64_GOTTPOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_X86_64_TLSGD;
- break;
- case MCSymbolRefExpr::VK_TLSLD:
- Type = ELF::R_X86_64_TLSLD;
- break;
- }
- break;
- case FK_PCRel_2:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC16;
- break;
- case FK_PCRel_1:
- assert(Modifier == MCSymbolRefExpr::VK_None);
- Type = ELF::R_X86_64_PC8;
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
- case X86::reloc_global_offset_table8:
- Type = ELF::R_X86_64_GOTPC64;
- break;
- case X86::reloc_global_offset_table:
- Type = ELF::R_X86_64_GOTPC32;
- break;
- case FK_Data_8:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_64;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_X86_64_GOT64;
- break;
- case MCSymbolRefExpr::VK_GOTOFF:
- Type = ELF::R_X86_64_GOTOFF64;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_X86_64_TPOFF64;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_X86_64_DTPOFF64;
- break;
- }
- break;
- case X86::reloc_signed_4byte:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_X86_64_32S;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_X86_64_GOT32;
- break;
- case MCSymbolRefExpr::VK_GOTPCREL:
- Type = ELF::R_X86_64_GOTPCREL;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_X86_64_TPOFF32;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_X86_64_DTPOFF32;
- break;
- }
- break;
- case FK_Data_4:
- Type = ELF::R_X86_64_32;
- break;
- case FK_Data_2: Type = ELF::R_X86_64_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_X86_64_8; break;
- }
+static unsigned getRelocType64(MCSymbolRefExpr::VariantKind Modifier,
+ X86_64RelType Type, bool IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ switch (Type) {
+ case RT64_64:
+ return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+ case RT64_32:
+ return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
+ case RT64_32S:
+ return ELF::R_X86_64_32S;
+ case RT64_16:
+ return IsPCRel ? ELF::R_X86_64_PC16 : ELF::R_X86_64_16;
+ case RT64_8:
+ return IsPCRel ? ELF::R_X86_64_PC8 : ELF::R_X86_64_8;
}
- } else if (getEMachine() == ELF::EM_386) {
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
-
- case X86::reloc_global_offset_table:
- Type = ELF::R_386_GOTPC;
- break;
-
- case FK_PCRel_1:
- case FK_Data_1:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC8;
- break;
- }
- break;
-
- case FK_PCRel_2:
- case FK_Data_2:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC16;
- break;
- }
- break;
+ case MCSymbolRefExpr::VK_GOT:
+ switch (Type) {
+ case RT64_64:
+ return IsPCRel ? ELF::R_X86_64_GOTPC64 : ELF::R_X86_64_GOT64;
+ case RT64_32:
+ return IsPCRel ? ELF::R_X86_64_GOTPC32 : ELF::R_X86_64_GOT32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_GOTOFF:
+ assert(Type == RT64_64);
+ assert(!IsPCRel);
+ return ELF::R_X86_64_GOTOFF64;
+ case MCSymbolRefExpr::VK_TPOFF:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_TPOFF64;
+ case RT64_32:
+ return ELF::R_X86_64_TPOFF32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_DTPOFF64;
+ case RT64_32:
+ return ELF::R_X86_64_DTPOFF32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_SIZE:
+ assert(!IsPCRel);
+ switch (Type) {
+ case RT64_64:
+ return ELF::R_X86_64_SIZE64;
+ case RT64_32:
+ return ELF::R_X86_64_SIZE32;
+ case RT64_32S:
+ case RT64_16:
+ case RT64_8:
+ llvm_unreachable("Unimplemented");
+ }
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_TLSGD;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_GOTTPOFF;
+ case MCSymbolRefExpr::VK_TLSLD:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_TLSLD;
+ case MCSymbolRefExpr::VK_PLT:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_PLT32;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ assert(Type == RT64_32);
+ return ELF::R_X86_64_GOTPCREL;
+ }
+}
- case X86::reloc_signed_4byte:
- case FK_PCRel_4:
- case FK_Data_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_PC32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
- break;
- }
- break;
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
+enum X86_32RelType { RT32_32, RT32_16, RT32_8 };
- case X86::reloc_global_offset_table:
- Type = ELF::R_386_GOTPC;
- break;
+static X86_32RelType getType32(X86_64RelType T) {
+ switch (T) {
+ case RT64_64:
+ llvm_unreachable("Unimplemented");
+ case RT64_32:
+ case RT64_32S:
+ return RT32_32;
+ case RT64_16:
+ return RT32_16;
+ case RT64_8:
+ return RT32_8;
+ }
+ llvm_unreachable("unexpected relocation type!");
+}
- // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
- // instead?
- case X86::reloc_signed_4byte:
- case FK_PCRel_4:
- case FK_Data_4:
- switch (Modifier) {
- default:
- llvm_unreachable("Unimplemented");
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_386_32;
- break;
- case MCSymbolRefExpr::VK_GOT:
- Type = ELF::R_386_GOT32;
- break;
- case MCSymbolRefExpr::VK_PLT:
- Type = ELF::R_386_PLT32;
- break;
- case MCSymbolRefExpr::VK_GOTOFF:
- Type = ELF::R_386_GOTOFF;
- break;
- case MCSymbolRefExpr::VK_TLSGD:
- Type = ELF::R_386_TLS_GD;
- break;
- case MCSymbolRefExpr::VK_TPOFF:
- Type = ELF::R_386_TLS_LE_32;
- break;
- case MCSymbolRefExpr::VK_INDNTPOFF:
- Type = ELF::R_386_TLS_IE;
- break;
- case MCSymbolRefExpr::VK_NTPOFF:
- Type = ELF::R_386_TLS_LE;
- break;
- case MCSymbolRefExpr::VK_GOTNTPOFF:
- Type = ELF::R_386_TLS_GOTIE;
- break;
- case MCSymbolRefExpr::VK_TLSLDM:
- Type = ELF::R_386_TLS_LDM;
- break;
- case MCSymbolRefExpr::VK_DTPOFF:
- Type = ELF::R_386_TLS_LDO_32;
- break;
- case MCSymbolRefExpr::VK_GOTTPOFF:
- Type = ELF::R_386_TLS_IE_32;
- break;
- }
- break;
- case FK_Data_2: Type = ELF::R_386_16; break;
- case FK_PCRel_1:
- case FK_Data_1: Type = ELF::R_386_8; break;
- }
+static unsigned getRelocType32(MCSymbolRefExpr::VariantKind Modifier,
+ X86_32RelType Type, bool IsPCRel) {
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case MCSymbolRefExpr::VK_None:
+ switch (Type) {
+ case RT32_32:
+ return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
+ case RT32_16:
+ return IsPCRel ? ELF::R_386_PC16 : ELF::R_386_16;
+ case RT32_8:
+ return IsPCRel ? ELF::R_386_PC8 : ELF::R_386_8;
}
- } else
- llvm_unreachable("Unsupported ELF machine type.");
+ case MCSymbolRefExpr::VK_GOT:
+ assert(Type == RT32_32);
+ return IsPCRel ? ELF::R_386_GOTPC : ELF::R_386_GOT32;
+ case MCSymbolRefExpr::VK_GOTOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_GOTOFF;
+ case MCSymbolRefExpr::VK_TPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LE_32;
+ case MCSymbolRefExpr::VK_DTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LDO_32;
+ case MCSymbolRefExpr::VK_TLSGD:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_GD;
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_IE_32;
+ case MCSymbolRefExpr::VK_PLT:
+ assert(Type == RT32_32);
+ return ELF::R_386_PLT32;
+ case MCSymbolRefExpr::VK_INDNTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_IE;
+ case MCSymbolRefExpr::VK_NTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LE;
+ case MCSymbolRefExpr::VK_GOTNTPOFF:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_GOTIE;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ assert(Type == RT32_32);
+ assert(!IsPCRel);
+ return ELF::R_386_TLS_LDM;
+ }
+}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
+ X86_64RelType Type = getType64(Fixup.getKind(), Modifier, IsPCRel);
+ if (getEMachine() == ELF::EM_X86_64)
+ return getRelocType64(Modifier, Type, IsPCRel);
- return Type;
+ assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+ return getRelocType32(Modifier, getType32(Type), IsPCRel);
}
MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index b679316..10b83f4 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -36,7 +36,7 @@ public:
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
- if (Sym->isVariable() == false)
+ if (!Sym->isVariable())
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 3ad8ab1..9b98a3e 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -168,10 +168,8 @@ public:
} // end anonymous namespace
-
MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new X86MCCodeEmitter(MCII, Ctx);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 0e7b4e5..0946326 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -55,143 +55,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
return FS;
}
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments. If we can't run cpuid on the host, return true.
-bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- #if defined(__GNUC__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
- asm ("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- int registers[4];
- __cpuid(registers, value);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
- #else
- return true;
- #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- #if defined(__GNUC__)
- asm ("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value));
- return false;
- #elif defined(_MSC_VER)
- __asm {
- mov eax,value
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
- #else
- return true;
- #endif
-#else
- return true;
-#endif
-}
-
-/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
-/// 4 values in the specified arguments. If we can't run cpuid on the host,
-/// return true.
-bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- #if defined(__GNUC__)
- // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
- asm ("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value),
- "c" (subleaf));
- return false;
- #elif defined(_MSC_VER)
- int registers[4];
- __cpuidex(registers, value, subleaf);
- *rEAX = registers[0];
- *rEBX = registers[1];
- *rECX = registers[2];
- *rEDX = registers[3];
- return false;
- #else
- return true;
- #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
- #if defined(__GNUC__)
- asm ("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a" (*rEAX),
- "=S" (*rEBX),
- "=c" (*rECX),
- "=d" (*rEDX)
- : "a" (value),
- "c" (subleaf));
- return false;
- #elif defined(_MSC_VER)
- __asm {
- mov eax,value
- mov ecx,subleaf
- cpuid
- mov esi,rEAX
- mov dword ptr [esi],eax
- mov esi,rEBX
- mov dword ptr [esi],ebx
- mov esi,rECX
- mov dword ptr [esi],ecx
- mov esi,rEDX
- mov dword ptr [esi],edx
- }
- return false;
- #else
- return true;
- #endif
-#else
- return true;
-#endif
-}
-
-void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
- unsigned &Model) {
- Family = (EAX >> 8) & 0xf; // Bits 8 - 11
- Model = (EAX >> 4) & 0xf; // Bits 4 - 7
- if (Family == 6 || Family == 0xf) {
- if (Family == 0xf)
- // Examine extended family ID if family ID is F.
- Family += (EAX >> 20) & 0xff; // Bits 20 - 27
- // Examine extended model ID if family ID is 6 or F.
- Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
- }
-}
-
unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
if (TT.getArch() == Triple::x86_64)
return DWARFFlavour::X86_64;
@@ -344,24 +207,6 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll) {
- Triple TheTriple(TT);
-
- switch (TheTriple.getObjectFormat()) {
- default: llvm_unreachable("unsupported object format");
- case Triple::MachO:
- return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
- case Triple::COFF:
- assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
- return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
- case Triple::ELF:
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
- }
-}
-
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -392,61 +237,42 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
// Force static initialization.
extern "C" void LLVMInitializeX86TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
- RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
-
- // Register the MC codegen info.
- RegisterMCCodeGenInfoFn C(TheX86_32Target, createX86MCCodeGenInfo);
- RegisterMCCodeGenInfoFn D(TheX86_64Target, createX86MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
- TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
- X86_MC::createX86MCSubtargetInfo);
- TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
- X86_MC::createX86MCSubtargetInfo);
-
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheX86_32Target,
- createX86MCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheX86_64Target,
- createX86MCInstrAnalysis);
-
- // Register the code emitter.
- TargetRegistry::RegisterMCCodeEmitter(TheX86_32Target,
- createX86MCCodeEmitter);
- TargetRegistry::RegisterMCCodeEmitter(TheX86_64Target,
- createX86MCCodeEmitter);
+ for (Target *T : {&TheX86_32Target, &TheX86_64Target}) {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo);
+
+ // Register the MC codegen info.
+ RegisterMCCodeGenInfoFn Y(*T, createX86MCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createX86MCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createX86MCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T,
+ X86_MC::createX86MCSubtargetInfo);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createX86MCInstrAnalysis);
+
+ // Register the code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createX86MCCodeEmitter);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterCOFFStreamer(*T, createX86WinCOFFStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(*T, createX86MCInstPrinter);
+
+ // Register the MC relocation info.
+ TargetRegistry::RegisterMCRelocationInfo(*T, createX86MCRelocationInfo);
+ }
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
createX86_32AsmBackend);
TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheX86_32Target,
- createMCStreamer);
- TargetRegistry::RegisterMCObjectStreamer(TheX86_64Target,
- createMCStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,
- createX86MCInstPrinter);
- TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,
- createX86MCInstPrinter);
-
- // Register the MC relocation info.
- TargetRegistry::RegisterMCRelocationInfo(TheX86_32Target,
- createX86MCRelocationInfo);
- TargetRegistry::RegisterMCRelocationInfo(TheX86_64Target,
- createX86MCRelocationInfo);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index d8320b9..6f50f11 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -53,18 +53,6 @@ namespace N86 {
namespace X86_MC {
std::string ParseX86Triple(StringRef TT);
- /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
- /// the specified arguments. If we can't run cpuid on the host, return true.
- bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
- /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
- /// the 4 values in the specified arguments. If we can't run cpuid on the
- /// host, return true.
- bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
- unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
-
- void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
-
unsigned getDwarfRegFlavour(Triple TT, bool isEH);
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
@@ -78,7 +66,6 @@ namespace X86_MC {
MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
MCContext &Ctx);
MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
@@ -86,12 +73,12 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code
-/// streamer which will generate PE/COFF format object files.
+/// Construct an X86 Windows COFF machine code streamer which will generate
+/// PE/COFF format object files.
///
/// Takes ownership of \p AB and \p CE.
MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- MCCodeEmitter *CE, raw_ostream &OS,
+ raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll);
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index 3b81d53..81749fc 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -38,7 +38,7 @@ public:
MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
- if (Sym->isVariable() == false)
+ if (!Sym->isVariable())
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
const MCExpr *Expr = nullptr;
@@ -93,7 +93,7 @@ public:
RSymI->getName(RSymName);
MCSymbol *RSym = Ctx.GetOrCreateSymbol(RSymName);
- if (RSym->isVariable() == false)
+ if (!RSym->isVariable())
RSym->setVariableValue(MCConstantExpr::Create(RSymAddr, Ctx));
const MCExpr *RHS = MCSymbolRefExpr::Create(RSym, Ctx);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5f1596c..5690efe 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -48,13 +48,11 @@ void X86WinCOFFStreamer::FinishImpl() {
}
}
-namespace llvm {
-MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- MCCodeEmitter *CE, raw_ostream &OS,
- bool RelaxAll) {
+MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
return S;
}
-}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 71329b0..e6896e8 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -93,36 +93,6 @@ The pattern isel got this one right.
//===---------------------------------------------------------------------===//
-SSE should implement 'select_cc' using 'emulated conditional moves' that use
-pcmp/pand/pandn/por to do a selection instead of a conditional branch:
-
-double %X(double %Y, double %Z, double %A, double %B) {
- %C = setlt double %A, %B
- %z = fadd double %Z, 0.0 ;; select operand is not a load
- %D = select bool %C, double %Y, double %z
- ret double %D
-}
-
-We currently emit:
-
-_X:
- subl $12, %esp
- xorpd %xmm0, %xmm0
- addsd 24(%esp), %xmm0
- movsd 32(%esp), %xmm1
- movsd 16(%esp), %xmm2
- ucomisd 40(%esp), %xmm1
- jb LBB_X_2
-LBB_X_1:
- movsd %xmm0, %xmm2
-LBB_X_2:
- movsd %xmm2, (%esp)
- fldl (%esp)
- addl $12, %esp
- ret
-
-//===---------------------------------------------------------------------===//
-
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.
@@ -787,25 +757,6 @@ cheaper to do fld1 than load from a constant pool for example, so
//===---------------------------------------------------------------------===//
-The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to
-"cmpsd". For example, this code:
-
-double d1(double x) { return x == x ? x : x + x; }
-
-Compiles into:
-
-_d1:
- ucomisd %xmm0, %xmm0
- jnp LBB1_2
- addsd %xmm0, %xmm0
- ret
-LBB1_2:
- ret
-
-Also, the 'ret's should be shared. This is PR6032.
-
-//===---------------------------------------------------------------------===//
-
These should compile into the same code (PR6214): Perhaps instcombine should
canonicalize the former into the later?
@@ -858,35 +809,6 @@ doing a shuffle from v[1] to v[0] then a float store.
//===---------------------------------------------------------------------===//
-On SSE4 machines, we compile this code:
-
-define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
- <2 x float> *%P) nounwind {
- %Z = fadd <2 x float> %Q, %R
-
- store <2 x float> %Z, <2 x float> *%P
- ret <2 x float> %Z
-}
-
-into:
-
-_test2: ## @test2
-## BB#0:
- insertps $0, %xmm2, %xmm2
- insertps $16, %xmm3, %xmm2
- insertps $0, %xmm0, %xmm3
- insertps $16, %xmm1, %xmm3
- addps %xmm2, %xmm3
- movq %xmm3, (%rdi)
- movaps %xmm3, %xmm0
- pshufd $1, %xmm3, %xmm1
- ## kill: XMM1<def> XMM1<kill>
- ret
-
-The insertps's of $0 are pointless complex copies.
-
-//===---------------------------------------------------------------------===//
-
[UNSAFE FP]
void foo(double, double, double);
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index bb0b9ce..f6033a7 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -63,9 +63,6 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer.EndCOFFSymbolDef();
}
- // Have common code print out the function header with linkage info etc.
- EmitFunctionHeader();
-
// Emit the rest of the function body.
EmitFunctionBody();
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index a17f052..cba140f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -84,7 +84,7 @@ private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
- unsigned &ResultReg);
+ unsigned &ResultReg, unsigned Alignment = 1);
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
MachineMemOperand *MMO = nullptr, bool Aligned = false);
@@ -327,7 +327,8 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- MachineMemOperand *MMO, unsigned &ResultReg) {
+ MachineMemOperand *MMO, unsigned &ResultReg,
+ unsigned Alignment) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -372,6 +373,30 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
case MVT::f80:
// No f80 support yet.
return false;
+ case MVT::v4f32:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v2f64:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ RC = &X86::VR128RegClass;
+ break;
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (Alignment >= 16)
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ else
+ Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ RC = &X86::VR128RegClass;
+ break;
}
ResultReg = createResultReg(RC);
@@ -1068,8 +1093,14 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (!X86SelectAddress(Ptr, AM))
return false;
+ unsigned Alignment = LI->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+
unsigned ResultReg = 0;
- if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
+ Alignment))
return false;
updateValueMap(I, ResultReg);
@@ -1094,20 +1125,30 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
}
}
-/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
-/// of the comparison, return an opcode that works for the compare (e.g.
-/// CMP32ri) otherwise return 0.
+/// If we have a comparison with RHS as the RHS of the comparison, return an
+/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+ int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
- default: return 0;
- case MVT::i8: return X86::CMP8ri;
- case MVT::i16: return X86::CMP16ri;
- case MVT::i32: return X86::CMP32ri;
+ default:
+ return 0;
+ case MVT::i8:
+ return X86::CMP8ri;
+ case MVT::i16:
+ if (isInt<8>(Val))
+ return X86::CMP16ri8;
+ return X86::CMP16ri;
+ case MVT::i32:
+ if (isInt<8>(Val))
+ return X86::CMP32ri8;
+ return X86::CMP32ri;
case MVT::i64:
+ if (isInt<8>(Val))
+ return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
- if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+ if (isInt<32>(Val))
return X86::CMP64ri32;
return 0;
}
@@ -1810,11 +1851,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
return true;
}
-/// \brief Emit SSE instructions to lower the select.
+/// \brief Emit SSE or AVX instructions to lower the select.
///
/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
-/// SSE instructions are available.
+/// SSE instructions are available. If AVX is available, try to use a VBLENDV.
bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
// Optimize conditions coming from a compare if both instructions are in the
// same basic block (values defined in other basic blocks may not have
@@ -1850,19 +1891,17 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- static unsigned OpcTable[2][2][4] = {
- { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
- { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
- { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
- { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ // Choose the SSE instruction sequence based on data type (float or double).
+ static unsigned OpcTable[2][4] = {
+ { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }
};
- bool HasAVX = Subtarget->hasAVX();
unsigned *Opc = nullptr;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
- case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ case MVT::f32: Opc = &OpcTable[0][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][0]; break;
}
const Value *LHS = I->getOperand(1);
@@ -1884,14 +1923,33 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
return false;
const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
- unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
- CmpRHSReg, CmpRHSIsKill, CC);
- unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
- LHSReg, LHSIsKill);
- unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
- RHSReg, RHSIsKill);
- unsigned ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
- AndReg, /*IsKill=*/true);
+ unsigned ResultReg;
+
+ if (Subtarget->hasAVX()) {
+ // If we have AVX, create 1 blendv instead of 3 logic instructions.
+ // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+ unsigned CmpOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+ unsigned BlendOpcode =
+ (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ } else {
+ unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ }
updateValueMap(I, ResultReg);
return true;
}
@@ -2015,38 +2073,30 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
if (OpReg == 0)
return false;
- bool HasAVX = Subtarget->hasAVX();
const TargetRegisterClass *RC = nullptr;
unsigned Opcode;
- if (I->getType()->isDoubleTy() && X86ScalarSSEf64) {
+ if (I->getType()->isDoubleTy()) {
// sitofp int -> double
- Opcode = HasAVX ? X86::VCVTSI2SDrr : X86::CVTSI2SDrr;
+ Opcode = X86::VCVTSI2SDrr;
RC = &X86::FR64RegClass;
- } else if (I->getType()->isFloatTy() && X86ScalarSSEf32) {
+ } else if (I->getType()->isFloatTy()) {
// sitofp int -> float
- Opcode = HasAVX ? X86::VCVTSI2SSrr : X86::CVTSI2SSrr;
+ Opcode = X86::VCVTSI2SSrr;
RC = &X86::FR32RegClass;
} else
return false;
+ // The target-independent selection algorithm in FastISel already knows how
+ // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only
+ // reachable if the subtarget has AVX.
+ assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!");
- unsigned ImplicitDefReg = 0;
- if (HasAVX) {
- ImplicitDefReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
- }
-
- const MCInstrDesc &II = TII.get(Opcode);
- OpReg = constrainOperandRegClass(II, OpReg, (HasAVX ? 2 : 1));
-
- unsigned ResultReg = createResultReg(RC);
- MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg);
- if (ImplicitDefReg)
- MIB.addReg(ImplicitDefReg, RegState::Kill);
- MIB.addReg(OpReg);
+ unsigned ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ unsigned ResultReg =
+ fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
updateValueMap(I, ResultReg);
return true;
}
@@ -3053,7 +3103,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Add a register mask operand representing the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(CC));
+ MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
// Add an implicit use GOT pointer in EBX.
if (Subtarget->isPICStyleGOT())
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index c8e5f64..3b0bd03 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -32,10 +32,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
@@ -300,7 +300,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// function. If it is all integer, there is nothing for us to do!
bool FPIsUsed = false;
- assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+ static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
for (unsigned i = 0; i <= 6; ++i)
if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
FPIsUsed = true;
@@ -438,7 +438,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Rewind to first instruction newly inserted.
while (Start != BB.begin() && std::prev(Start) != PrevI) --Start;
dbgs() << "Inserted instructions:\n\t";
- Start->print(dbgs(), &MF.getTarget());
+ Start->print(dbgs());
while (++Start != std::next(I)) {}
}
dumpStack();
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index cead099..1d2c73c 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -581,7 +581,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
bool Is64Bit = STI.is64Bit();
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
- bool IsWin64 = STI.isTargetWin64();
+ bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
// Not necessarily synonymous with IsWin64.
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8d50ae1..fb12ce5 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -228,7 +228,7 @@ namespace {
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
+ unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
void EmitSpecialCodeForMain();
@@ -1004,6 +1004,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
switch (N.getOpcode()) {
default: break;
+ case ISD::FRAME_ALLOC_RECOVER: {
+ if (!AM.hasSymbolicDisplacement())
+ if (const auto *ESNode = dyn_cast<ExternalSymbolSDNode>(N.getOperand(0)))
+ if (ESNode->getOpcode() == ISD::TargetExternalSymbol) {
+ AM.ES = ESNode->getSymbol();
+ return false;
+ }
+ break;
+ }
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
if (!FoldOffsetIntoAddress(Val, AM))
@@ -2805,14 +2814,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
bool X86DAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Op0, Op1, Op2, Op3, Op4;
- switch (ConstraintCode) {
- case 'o': // offsetable ??
- case 'v': // not offsetable ??
+ switch (ConstraintID) {
+ case InlineAsm::Constraint_o: // offsetable ??
+ case InlineAsm::Constraint_v: // not offsetable ??
default: return true;
- case 'm': // memory
+ case InlineAsm::Constraint_m: // memory
if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6866be7..8b92e70 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -25,7 +25,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -77,119 +76,6 @@ static cl::opt<int> ReciprocalEstimateRefinementSteps(
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl,
- unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- unsigned Factor = VT.getSizeInBits()/vectorWidth;
- EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
- VT.getVectorNumElements()/Factor);
-
- // Extract from UNDEF is UNDEF.
- if (Vec.getOpcode() == ISD::UNDEF)
- return DAG.getUNDEF(ResultVT);
-
- // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
- unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
-
- // If the input is a buildvector just emit a smaller one.
- if (Vec.getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
-}
-
-/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
-/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
-/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
-/// instructions or a simple subregister reference. Idx is an index in the
-/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering EXTRACT_VECTOR_ELT operations easier.
-static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert((Vec.getValueType().is256BitVector() ||
- Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
-}
-
-/// Generate a DAG to grab 256-bits from a 512-bit vector.
-static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
- return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
-}
-
-static SDValue InsertSubVector(SDValue Result, SDValue Vec,
- unsigned IdxVal, SelectionDAG &DAG,
- SDLoc dl, unsigned vectorWidth) {
- assert((vectorWidth == 128 || vectorWidth == 256) &&
- "Unsupported vector width");
- // Inserting UNDEF is Result
- if (Vec.getOpcode() == ISD::UNDEF)
- return Result;
- EVT VT = Vec.getValueType();
- EVT ElVT = VT.getVectorElementType();
- EVT ResultVT = Result.getValueType();
-
- // Insert the relevant vectorWidth bits.
- unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
-
- // This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
-
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
-}
-
-/// Generate a DAG to put 128-bits into a vector > 128 bits. This
-/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
-/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
-/// simple superregister reference. Idx is an index in the 128 bits
-/// we want. It need not be aligned to a 128-bit boundary. That makes
-/// lowering INSERT_VECTOR_ELT operations easier.
-static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG,SDLoc dl) {
- assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
-}
-
-static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
- SelectionDAG &DAG, SDLoc dl) {
- assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
- return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
-}
-
-/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
-/// instructions. This is used because creating CONCAT_VECTOR nodes of
-/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
-/// large BUILD_VECTORS.
-static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
-static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
- unsigned NumElems, SelectionDAG &DAG,
- SDLoc dl) {
- SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
- return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
-}
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -871,35 +757,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// MMX-sized vectors (other than x86mmx) are expected to be expanded
// into smaller operations.
- setOperationAction(ISD::MULHS, MVT::v8i8, Expand);
- setOperationAction(ISD::MULHS, MVT::v4i16, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i32, Expand);
- setOperationAction(ISD::MULHS, MVT::v1i64, Expand);
- setOperationAction(ISD::AND, MVT::v8i8, Expand);
- setOperationAction(ISD::AND, MVT::v4i16, Expand);
- setOperationAction(ISD::AND, MVT::v2i32, Expand);
- setOperationAction(ISD::AND, MVT::v1i64, Expand);
- setOperationAction(ISD::OR, MVT::v8i8, Expand);
- setOperationAction(ISD::OR, MVT::v4i16, Expand);
- setOperationAction(ISD::OR, MVT::v2i32, Expand);
- setOperationAction(ISD::OR, MVT::v1i64, Expand);
- setOperationAction(ISD::XOR, MVT::v8i8, Expand);
- setOperationAction(ISD::XOR, MVT::v4i16, Expand);
- setOperationAction(ISD::XOR, MVT::v2i32, Expand);
- setOperationAction(ISD::XOR, MVT::v1i64, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i32, Expand);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Expand);
+ for (MVT MMXTy : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v1i64}) {
+ setOperationAction(ISD::MULHS, MMXTy, Expand);
+ setOperationAction(ISD::AND, MMXTy, Expand);
+ setOperationAction(ISD::OR, MMXTy, Expand);
+ setOperationAction(ISD::XOR, MMXTy, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MMXTy, Expand);
+ setOperationAction(ISD::SELECT, MMXTy, Expand);
+ setOperationAction(ISD::BITCAST, MMXTy, Expand);
+ }
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand);
- setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
- setOperationAction(ISD::BITCAST, MVT::v8i8, Expand);
- setOperationAction(ISD::BITCAST, MVT::v4i16, Expand);
- setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
@@ -1065,27 +932,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-
- setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
- setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
- setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
+ setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
+ setOperationAction(ISD::FCEIL, RoundedTy, Legal);
+ setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
+ setOperationAction(ISD::FRINT, RoundedTy, Legal);
+ setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
+ }
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1474,7 +1327,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
@@ -1576,6 +1428,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v32i16, Legal);
setOperationAction(ISD::SUB, MVT::v64i8, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -1599,7 +1455,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::v4i1, Custom);
setOperationAction(ISD::SETCC, MVT::v2i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@@ -3189,7 +3048,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -3906,21 +3765,6 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
return true;
}
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
- unsigned NumElems) {
- for (unsigned i = 0; i != NumElems; ++i) {
- int idx = Mask[i];
- if (idx < 0)
- continue;
- else if (idx < (int)NumElems)
- Mask[i] = idx + NumElems;
- else
- Mask[i] = idx - NumElems;
- }
-}
-
/// isVEXTRACTIndex - Return true if the specified
/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
/// suitable for instruction that extract 128 or 256 bit vectors
@@ -4083,9 +3927,13 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
- assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
+
+ assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
+ && "Unexpected vector type");
+ assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8)
+ && "Unexpected vector type");
SDValue Cst = DAG.getConstant(0, MVT::i1);
- SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+ SmallVector<SDValue, 64> Ops(VT.getVectorNumElements(), Cst);
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
llvm_unreachable("Unexpected vector type");
@@ -4093,6 +3941,162 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
}
+static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl,
+ unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ unsigned Factor = VT.getSizeInBits()/vectorWidth;
+ EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
+ VT.getVectorNumElements()/Factor);
+
+ // Extract from UNDEF is UNDEF.
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(ResultVT);
+
+ // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
+ unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
+ * ElemsPerChunk);
+
+ // If the input is a buildvector just emit a smaller one.
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+ makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
+ ElemsPerChunk));
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
+}
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
+/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
+/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
+/// instructions or a simple subregister reference. Idx is an index in the
+/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert((Vec.getValueType().is256BitVector() ||
+ Vec.getValueType().is512BitVector()) && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 128);
+}
+
+/// Generate a DAG to grab 256-bits from a 512-bit vector.
+static SDValue Extract256BitVector(SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!");
+ return ExtractSubVector(Vec, IdxVal, DAG, dl, 256);
+}
+
+static SDValue InsertSubVector(SDValue Result, SDValue Vec,
+ unsigned IdxVal, SelectionDAG &DAG,
+ SDLoc dl, unsigned vectorWidth) {
+ assert((vectorWidth == 128 || vectorWidth == 256) &&
+ "Unsupported vector width");
+ // Inserting UNDEF is Result
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return Result;
+ EVT VT = Vec.getValueType();
+ EVT ElVT = VT.getVectorElementType();
+ EVT ResultVT = Result.getValueType();
+
+ // Insert the relevant vectorWidth bits.
+ unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+
+ // This is the index of the first element of the vectorWidth-bit chunk
+ // we want.
+ unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
+ * ElemsPerChunk);
+
+ SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits. This
+/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
+/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
+/// simple superregister reference. Idx is an index in the 128 bits
+/// we want. It need not be aligned to a 128-bit boundary. That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
+
+ // For insertion into the zero index (low half) of a 256-bit vector, it is
+ // more efficient to generate a blend with immediate instead of an insert*128.
+ // We are still creating an INSERT_SUBVECTOR below with an undef node to
+ // extend the subvector to the size of the result vector. Make sure that
+ // we are not recursing on that node by checking for undef here.
+ if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
+ Result.getOpcode() != ISD::UNDEF) {
+ EVT ResultVT = Result.getValueType();
+ SDValue ZeroIndex = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(ResultVT);
+ SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
+ Vec, ZeroIndex);
+
+ // The blend instruction, and therefore its mask, depend on the data type.
+ MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ if (ScalarType.isFloatingPoint()) {
+ // Choose either vblendps (float) or vblendpd (double).
+ unsigned ScalarSize = ScalarType.getSizeInBits();
+ assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
+ unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
+ SDValue Mask = DAG.getConstant(MaskVal, MVT::i8);
+ return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
+ }
+
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+
+ // AVX2 is needed for 256-bit integer blend support.
+ // Integers must be cast to 32-bit because there is only vpblendd;
+ // vpblendw can't be used for this because it has a handicapped mask.
+
+ // If we don't have AVX2, then cast to float. Using a wrong domain blend
+ // is still more efficient than using the wrong domain vinsertf128 that
+ // will be created by InsertSubVector().
+ MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
+
+ SDValue Mask = DAG.getConstant(0x0f, MVT::i8);
+ Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256);
+ Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
+ return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256);
+ }
+
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
+}
+
+static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!");
+ return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
+}
+
+/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
+/// instructions. This is used because creating CONCAT_VECTOR nodes of
+/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
+/// large BUILD_VECTORS.
+static SDValue Concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert128BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
+static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
+ unsigned NumElems, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue V = Insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
+ return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
+}
+
/// getOnesVector - Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
@@ -5567,8 +5571,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
}
- SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG);
- if (Broadcast.getNode())
+ if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG))
return Broadcast;
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -5635,12 +5638,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
(ExtVT == MVT::i64 && Subtarget->is64Bit())) {
- if (VT.is256BitVector() || VT.is512BitVector()) {
+ if (VT.is512BitVector()) {
SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
Item, DAG.getIntPtrConstant(0));
}
- assert(VT.is128BitVector() && "Expected an SSE value type!");
+ assert((VT.is128BitVector() || VT.is256BitVector()) &&
+ "Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
// Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5742,24 +5746,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
// If element VT is < 32 bits, convert it to inserts into a zero vector.
- if (EVTBits == 8 && NumElems == 16) {
- SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this);
- if (V.getNode()) return V;
- }
+ if (EVTBits == 8 && NumElems == 16)
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this))
+ return V;
- if (EVTBits == 16 && NumElems == 8) {
- SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this);
- if (V.getNode()) return V;
- }
+ if (EVTBits == 16 && NumElems == 8)
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+ Subtarget, *this))
+ return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
- if (EVTBits == 32 && NumElems == 4) {
- SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this);
- if (V.getNode())
+ if (EVTBits == 32 && NumElems == 4)
+ if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this))
return V;
- }
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDValue, 8> V(NumElems);
@@ -5807,13 +5807,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
V[i] = Op.getOperand(i);
// Check for elements which are consecutive loads.
- SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false);
- if (LD.getNode())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG, false))
return LD;
// Check for a build vector from mostly shuffle plus few inserting.
- SDValue Sh = buildFromShuffleMostly(Op, DAG);
- if (Sh.getNode())
+ if (SDValue Sh = buildFromShuffleMostly(Op, DAG))
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
@@ -5893,8 +5891,64 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return Concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
}
-static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- MVT LLVM_ATTRIBUTE_UNUSED VT = Op.getSimpleValueType();
+static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG & DAG) {
+ SDLoc dl(Op);
+ MVT ResVT = Op.getSimpleValueType();
+ unsigned NumOfOperands = Op.getNumOperands();
+
+ assert(isPowerOf2_32(NumOfOperands) &&
+ "Unexpected number of operands in CONCAT_VECTORS");
+
+ if (NumOfOperands > 2) {
+ MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+ ResVT.getVectorNumElements()/2);
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned i = 0; i < NumOfOperands/2; i++)
+ Ops.push_back(Op.getOperand(i));
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+ Ops.clear();
+ for (unsigned i = NumOfOperands/2; i < NumOfOperands; i++)
+ Ops.push_back(Op.getOperand(i));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, Ops);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+ }
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ if (IsZeroV1 && IsZeroV2)
+ return getZeroVector(ResVT, Subtarget, DAG, dl);
+
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(ResVT);
+ unsigned NumElems = ResVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8);
+
+ V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
+ V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
+ if (IsZeroV1)
+ return V2;
+
+ V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+ // Zero the upper bits of V1
+ V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
+ V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
+ if (IsZeroV2)
+ return V1;
+ return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getVectorElementType() == MVT::i1)
+ return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG);
+
assert((VT.is256BitVector() && Op.getNumOperands() == 2) ||
(VT.is512BitVector() && (Op.getNumOperands() == 2 ||
Op.getNumOperands() == 4)));
@@ -6935,8 +6989,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
"a sorted mask where the broadcast "
"comes from V1.");
- // Go up the chain of (vector) values to try and find a scalar load that
- // we can combine with the broadcast.
+ // Go up the chain of (vector) values to find a scalar load that we can
+ // combine with the broadcast.
for (;;) {
switch (V.getOpcode()) {
case ISD::CONCAT_VECTORS: {
@@ -6973,12 +7027,12 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
- // If the scalar isn't a load we can't broadcast from it in AVX1, only with
- // AVX2.
+ // If the scalar isn't a load, we can't broadcast from it in AVX1.
+ // Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
} else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
- // We can't broadcast from a vector register w/o AVX2, and we can only
+ // We can't broadcast from a vector register without AVX2, and we can only
// broadcast from the zero-element of a vector register.
return SDValue();
}
@@ -7689,10 +7743,18 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
/// The exact breakdown of how to form these dword pairs and align them on the
/// correct sides is really tricky. See the comments within the function for
/// more of the details.
+///
+/// This code also handles repeated 128-bit lanes of v8i16 shuffles, but each
+/// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to
+/// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16
+/// vector, form the analogous 128-bit 8-element Mask.
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
- SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
+ SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+ MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+
+ assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
@@ -7845,9 +7907,9 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
int PSHUFDMask[] = {0, 1, 2, 3};
PSHUFDMask[ADWord] = BDWord;
PSHUFDMask[BDWord] = ADWord;
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ V = DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+ DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
// Adjust the mask to match the new locations of A and B.
@@ -7859,8 +7921,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
- return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
- Mask);
+ return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget,
+ DAG);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
@@ -8083,15 +8145,15 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Now enact all the shuffles we've computed to move the inputs into their
// target half.
if (!isNoopShuffleMask(PSHUFLMask))
- V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
if (!isNoopShuffleMask(PSHUFHMask))
- V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
if (!isNoopShuffleMask(PSHUFDMask))
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
- DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
- DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ V = DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT,
+ DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
// At this point, each half should contain all its inputs, and we can then
@@ -8105,7 +8167,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
// Do a half shuffle for the low mask.
if (!isNoopShuffleMask(LoMask))
- V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
getV4X86ShuffleImm8ForMask(LoMask, DAG));
// Do a half shuffle with the high mask after shifting its values down.
@@ -8113,7 +8175,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
if (M >= 0)
M -= 4;
if (!isNoopShuffleMask(HiMask))
- V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
getV4X86ShuffleImm8ForMask(HiMask, DAG));
return V;
@@ -8232,8 +8294,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, Subtarget, DAG))
return Rotate;
- return lowerV8I16GeneralSingleInputVectorShuffle(DL, V1, Mask, Subtarget,
- DAG);
+ return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, Mask,
+ Subtarget, DAG);
}
assert(std::any_of(Mask.begin(), Mask.end(), isV1) &&
@@ -8946,7 +9008,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
int LaneSize = Mask.size() / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
- // less expensive. The flags track wether the given lane contains an element
+ // less expensive. The flags track whether the given lane contains an element
// that crosses to another lane.
bool LaneCrossing[2] = {false, false};
for (int i = 0, Size = Mask.size(); i < Size; ++i)
@@ -8986,34 +9048,78 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ // TODO: If minimizing size and one of the inputs is a zero vector and the
+ // the zero vector has only one use, we could use a VPERM2X128 to save the
+ // instruction bytes needed to explicitly generate the zero vector.
+
// Blends are faster and handle all the non-lane-crossing cases.
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
Subtarget, DAG))
return Blend;
- MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
- // Check for patterns which can be matched with a single insert of a 128-bit
- // subvector.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}) ||
- isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
- SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
- Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
- }
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 6, 7})) {
- SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
- DAG.getIntPtrConstant(0));
- SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
- DAG.getIntPtrConstant(2));
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool IsV2Zero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ // If either input operand is a zero vector, use VPERM2X128 because its mask
+ // allows us to replace the zero input with an implicit zero.
+ if (!IsV1Zero && !IsV2Zero) {
+ // Check for patterns which can be matched with a single insert of a 128-bit
+ // subvector.
+ bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1});
+ if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5})) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+ }
+
+ // Otherwise form a 128-bit permutation. After accounting for undefs,
+ // convert the 64-bit shuffle mask selection values into 128-bit
+ // selection bits by dividing the indexes by 2 and shifting into positions
+ // defined by a vperm2*128 instruction's immediate control byte.
+
+ // The immediate permute control byte looks like this:
+ // [1:0] - select 128 bits from sources for low half of destination
+ // [2] - ignore
+ // [3] - zero low half of destination
+ // [5:4] - select 128 bits from sources for high half of destination
+ // [6] - ignore
+ // [7] - zero high half of destination
+
+ int MaskLO = Mask[0];
+ if (MaskLO == SM_SentinelUndef)
+ MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+
+ int MaskHI = Mask[2];
+ if (MaskHI == SM_SentinelUndef)
+ MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+
+ unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+
+ // If either input is a zero vector, replace it with an undef input.
+ // Shuffle mask values < 4 are selecting elements of V1.
+ // Shuffle mask values >= 4 are selecting elements of V2.
+ // Adjust each half of the permute mask by clearing the half that was
+ // selecting the zero vector and setting the zero mask bit.
+ if (IsV1Zero) {
+ V1 = DAG.getUNDEF(VT);
+ if (MaskLO < 4)
+ PermMask = (PermMask & 0xf0) | 0x08;
+ if (MaskHI < 4)
+ PermMask = (PermMask & 0x0f) | 0x80;
+ }
+ if (IsV2Zero) {
+ V2 = DAG.getUNDEF(VT);
+ if (MaskLO >= 4)
+ PermMask = (PermMask & 0xf0) | 0x08;
+ if (MaskHI >= 4)
+ PermMask = (PermMask & 0x0f) | 0x80;
}
- // Otherwise form a 128-bit permutation.
- // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
- unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
@@ -9326,6 +9432,15 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ // If we have a single input to the zero element, insert that into V1 if we
+ // can do so cheaply.
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
+ if (NumV2Elements == 1 && Mask[0] >= 8)
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -9557,6 +9672,15 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2,
Mask, DAG);
+ SmallVector<int, 8> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) {
+ // As this is a single-input shuffle, the repeated mask should be
+ // a strictly valid v8i16 mask that we can pass through to the v8i16
+ // lowering to handle even the v16 case.
+ return lowerV8I16GeneralSingleInputVectorShuffle(
+ DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG);
+ }
+
SDValue PSHUFBMask[32];
for (int i = 0; i < 16; ++i) {
if (Mask[i] == -1) {
@@ -10118,8 +10242,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
// Try to lower this to a blend-style vector shuffle. This can handle all
// constant condition cases.
- SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG);
- if (BlendOp.getNode())
+ if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG))
return BlendOp;
// Variable blends are only legal from SSE4.1 onward.
@@ -10421,17 +10544,31 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
// into that, and then insert the subvector back into the result.
if (VT.is256BitVector() || VT.is512BitVector()) {
- // Get the desired 128-bit vector half.
+ // With a 256-bit vector, we can insert into the zero element efficiently
+ // using a blend if we have AVX or AVX2 and the right data type.
+ if (VT.is256BitVector() && IdxVal == 0) {
+ // TODO: It is worthwhile to cast integer to floating point and back
+ // and incur a domain crossing penalty if that's what we'll end up
+ // doing anyway after extracting to a 128-bit vector.
+ if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+ (Subtarget->hasAVX2() && EltVT == MVT::i32)) {
+ SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
+ N2 = DAG.getIntPtrConstant(1);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2);
+ }
+ }
+
+ // Get the desired 128-bit vector chunk.
SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl);
- // Insert the element into the desired half.
+ // Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, MVT::i32));
- // Insert the changed part back to the 256-bit vector
+ // Insert the changed part back into the bigger vector
return Insert128BitVector(N0, V, IdxVal, DAG, dl);
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
@@ -10456,16 +10593,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
if (EltVT == MVT::f32) {
- // Bits [7:6] of the constant are the source select. This will always be
- // zero here. The DAG Combiner may combine an extract_elt index into
- // these
- // bits. For example (insert (extract, 3), 2) could be matched by
- // putting
- // the '3' into bits [7:6] of X86ISD::INSERTPS.
- // Bits [5:4] of the constant are the destination select. This is the
- // value of the incoming immediate.
- // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
+ // Bits [7:6] of the constant are the source select. This will always be
+ // zero here. The DAG Combiner may combine an extract_elt index into
+ // these bits. For example (insert (extract, 3), 2) could be matched by
+ // putting the '3' into bits [7:6] of X86ISD::INSERTPS.
+ // Bits [5:4] of the constant are the destination select. This is the
+ // value of the incoming immediate.
+ // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
+
+ const Function *F = DAG.getMachineFunction().getFunction();
+ bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+ if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
+ // If this is an insertion of 32-bits into the low 32-bits of
+ // a vector, we prefer to generate a blend with immediate rather
+ // than an insertps. Blends are simpler operations in hardware and so
+ // will always have equal or better performance than insertps.
+ // But if optimizing for size and there's a load folding opportunity,
+ // generate insertps because blendps does not have a 32-bit memory
+ // operand form.
+ N2 = DAG.getIntPtrConstant(1);
+ N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+ return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2);
+ }
N2 = DAG.getIntPtrConstant(IdxVal << 4);
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
@@ -10593,6 +10743,37 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
+ if (OpVT.getVectorElementType() == MVT::i1) {
+ if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
+ return Op;
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0);
+ SDValue Undef = DAG.getUNDEF(OpVT);
+ unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8);
+
+ if (IdxVal == OpVT.getVectorNumElements() / 2) {
+ // Zero upper bits of the Vec
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+
+ SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+ SubVec, ZeroIdx);
+ Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
+ }
+ if (IdxVal == 0) {
+ SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
+ SubVec, ZeroIdx);
+ // Zero upper bits of the Vec2
+ Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
+ Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
+ // Zero lower bits of the Vec
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ // Merge them together
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
+ }
+ }
return SDValue();
}
@@ -13149,9 +13330,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op1.getValueType();
SDValue CC;
- // Lower fp selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
- // are available. Otherwise fp cmovs get lowered into a less efficient branch
- // sequence later on.
+ // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
+ // are available or VBLENDV if AVX is available.
+ // Otherwise FP cmovs get lowered into a less efficient branch sequence later.
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
@@ -13166,8 +13347,42 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(SSECC, MVT::i8));
return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
}
+
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, MVT::i8));
+
+ // If we have AVX, we can use a variable vector select (VBLENDV) instead
+ // of 3 logic instructions for size savings and potentially speed.
+ // Unfortunately, there is no scalar form of VBLENDV.
+
+ // If either operand is a constant, don't try this. We can expect to
+ // optimize away at least one of the logic instructions later in that
+ // case, so that sequence would be faster than a variable blend.
+
+ // BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
+ // uses XMM0 as the selection register. That may need just as many
+ // instructions as the AND/ANDN/OR sequence due to register moves, so
+ // don't bother.
+
+ if (Subtarget->hasAVX() &&
+ !isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
+
+ // Convert to vectors, do a VSELECT, and convert back to scalar.
+ // All of the conversions should be optimized away.
+
+ EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+ SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
+ SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
+ SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
+
+ EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+ VCmp = DAG.getNode(ISD::BITCAST, DL, VCmpVT, VCmp);
+
+ SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ VSel, DAG.getIntPtrConstant(0));
+ }
SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2);
SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1);
return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And);
@@ -14595,6 +14810,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps:
+ // Operands intentionally swapped. Mask is last operand to intrinsic,
+ // but second operand for node/instruction.
+ return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
+ Op.getOperand(2), Op.getOperand(1));
+
case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
// Vector source operands are swapped.
@@ -16039,21 +16261,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
- SDValue V;
assert(VT.isVector() && "Custom lowering only for vector shifts!");
assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
- V = LowerScalarImmediateShift(Op, DAG, Subtarget);
- if (V.getNode())
+ if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget))
return V;
- V = LowerScalarVariableShift(Op, DAG, Subtarget);
- if (V.getNode())
+ if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
return Op;
+
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
if (Subtarget->hasInt256()) {
if (Op.getOpcode() == ISD::SRL &&
@@ -16068,6 +16288,17 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return Op;
}
+ // 2i64 vector logical shifts can efficiently avoid scalarization - do the
+ // shifts per-lane and then shuffle the partial results back together.
+ if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
+ // Splat the shift amounts so the scalar shifts above will catch it.
+ SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0});
+ SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1});
+ SDValue R0 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Amt1);
+ return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
+ }
+
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
@@ -16238,7 +16469,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
Amt = DAG.getNode(ISD::ANY_EXTEND, dl, NewVT, Amt);
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getNode(Op.getOpcode(), dl, NewVT, R, Amt));
- }
+ }
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.is256BitVector()) {
@@ -16254,12 +16485,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SDValue Amt1, Amt2;
if (Amt.getOpcode() == ISD::BUILD_VECTOR) {
// Constant shift amount
- SmallVector<SDValue, 4> Amt1Csts;
- SmallVector<SDValue, 4> Amt2Csts;
- for (unsigned i = 0; i != NumElems/2; ++i)
- Amt1Csts.push_back(Amt->getOperand(i));
- for (unsigned i = NumElems/2; i != NumElems; ++i)
- Amt2Csts.push_back(Amt->getOperand(i));
+ SmallVector<SDValue, 8> Ops(Amt->op_begin(), Amt->op_begin() + NumElems);
+ ArrayRef<SDValue> Amt1Csts = makeArrayRef(Ops).slice(0, NumElems / 2);
+ ArrayRef<SDValue> Amt2Csts = makeArrayRef(Ops).slice(NumElems / 2);
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
@@ -16386,14 +16614,17 @@ bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
return needsCmpXchgNb(PTy->getElementType());
}
-bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
const Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.
- if (MemType->getPrimitiveSizeInBits() > NativeWidth)
- return needsCmpXchgNb(MemType);
+ if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
+ return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
+ : AtomicRMWExpansionKind::None;
+ }
AtomicRMWInst::BinOp Op = AI->getOperation();
switch (Op) {
@@ -16403,13 +16634,14 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
- return false;
+ return AtomicRMWExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
- return !AI->use_empty();
+ return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
+ : AtomicRMWExpansionKind::None;
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -16417,7 +16649,7 @@ bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
- return true;
+ return AtomicRMWExpansionKind::CmpXChg;
}
}
@@ -16874,7 +17106,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG);
case ISD::VECTOR_SHUFFLE: return lowerVectorShuffle(Op, Subtarget, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
@@ -17719,7 +17951,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
// 9 ) EFLAGS (implicit-def)
assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
- assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+ static_assert(X86::AddrNumOperands == 5,
+ "VAARG_64 assumes 5 address operands");
unsigned DestReg = MI->getOperand(0).getReg();
MachineOperand &Base = MI->getOperand(1);
@@ -18095,6 +18328,92 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
+
+ // We also lower double CMOVs:
+ // (CMOV (CMOV F, T, cc1), T, cc2)
+ // to two successives branches. For that, we look for another CMOV as the
+ // following instruction.
+ //
+ // Without this, we would add a PHI between the two jumps, which ends up
+ // creating a few copies all around. For instance, for
+ //
+ // (sitofp (zext (fcmp une)))
+ //
+ // we would generate:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // movaps %xmm0, %xmm1
+ // jne .LBB5_2
+ // xorps %xmm1, %xmm1
+ // .LBB5_2:
+ // jp .LBB5_4
+ // movaps %xmm1, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ // because this custom-inserter would have generated:
+ //
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ // | \
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // B: empty
+ // C: Z = PHI [X, A], [Y, B]
+ // D: empty
+ // E: PHI [X, C], [Z, D]
+ //
+ // If we lower both CMOVs in a single step, we can instead generate:
+ //
+ // A
+ // | \
+ // | C
+ // | /|
+ // |/ |
+ // | |
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // D: empty
+ // E: PHI [X, A], [X, C], [Y, D]
+ //
+ // Which, in our sitofp/fcmp example, gives us something like:
+ //
+ // ucomiss %xmm1, %xmm0
+ // movss <1.0f>, %xmm0
+ // jne .LBB5_4
+ // jp .LBB5_4
+ // xorps %xmm0, %xmm0
+ // .LBB5_4:
+ // retq
+ //
+ MachineInstr *NextCMOV = nullptr;
+ MachineBasicBlock::iterator NextMIIt =
+ std::next(MachineBasicBlock::iterator(MI));
+ if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+ NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
+ NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
+ NextCMOV = &*NextMIIt;
+
+ MachineBasicBlock *jcc1MBB = nullptr;
+
+ // If we have a double CMOV, we lower it to two successive branches to
+ // the same block. EFLAGS is used by both, so mark it as live in the second.
+ if (NextCMOV) {
+ jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, jcc1MBB);
+ jcc1MBB->addLiveIn(X86::EFLAGS);
+ }
+
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
@@ -18103,8 +18422,10 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- if (!MI->killsRegister(X86::EFLAGS) &&
- !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
+
+ MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+ if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
+ !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
sinkMBB->addLiveIn(X86::EFLAGS);
}
@@ -18115,7 +18436,19 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
+ if (NextCMOV) {
+ // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+ BB->addSuccessor(jcc1MBB);
+
+ // In that case, jcc1MBB will itself fallthrough the copy0MBB, and
+ // jump to the sinkMBB.
+ jcc1MBB->addSuccessor(copy0MBB);
+ jcc1MBB->addSuccessor(sinkMBB);
+ } else {
+ BB->addSuccessor(copy0MBB);
+ }
+
+ // The true block target of the first (or only) branch is always sinkMBB.
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
@@ -18123,6 +18456,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+ if (NextCMOV) {
+ unsigned Opc2 = X86::GetCondBranchFromCond(
+ (X86::CondCode)NextCMOV->getOperand(3).getImm());
+ BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
+ }
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -18131,10 +18470,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(X86::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ MachineInstrBuilder MIB =
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+ // If we have a double CMOV, the second Jcc provides the same incoming
+ // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
+ if (NextCMOV) {
+ MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
+ // Copy the PHI result to the register defined by the second CMOV.
+ BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
+ DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg());
+ NextCMOV->eraseFromParent();
+ }
MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
@@ -18218,7 +18569,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ Subtarget->getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
@@ -18303,7 +18654,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- Subtarget->getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -19132,9 +19483,11 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// Note that even with AVX we prefer the PSHUFD form of shuffle for integer
// vectors because it can have a load folded into it that UNPCK cannot. This
// doesn't preclude something switching to the shorter encoding post-RA.
- if (FloatDomain) {
- if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
- bool Lo = Mask.equals(0, 0);
+ //
+ // FIXME: Should teach these routines about AVX vector widths.
+ if (FloatDomain && VT.getSizeInBits() == 128) {
+ if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
+ bool Lo = Mask.equals({0, 0});
unsigned Shuffle;
MVT ShuffleVT;
// Check if we have SSE3 which will let us use MOVDDUP. That instruction
@@ -19163,8 +19516,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
return true;
}
if (Subtarget->hasSSE3() &&
- (Mask.equals(0, 0, 2, 2) || Mask.equals(1, 1, 3, 3))) {
- bool Lo = Mask.equals(0, 0, 2, 2);
+ (Mask.equals({0, 0, 2, 2}) || Mask.equals({1, 1, 3, 3}))) {
+ bool Lo = Mask.equals({0, 0, 2, 2});
unsigned Shuffle = Lo ? X86ISD::MOVSLDUP : X86ISD::MOVSHDUP;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19177,8 +19530,8 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
/*AddTo*/ true);
return true;
}
- if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
- bool Lo = Mask.equals(0, 0, 1, 1);
+ if (Mask.equals({0, 0, 1, 1}) || Mask.equals({2, 2, 3, 3})) {
+ bool Lo = Mask.equals({0, 0, 1, 1});
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19196,12 +19549,12 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
- if (!FloatDomain &&
- (Mask.equals(0, 0, 1, 1, 2, 2, 3, 3) ||
- Mask.equals(4, 4, 5, 5, 6, 6, 7, 7) ||
- Mask.equals(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7) ||
- Mask.equals(8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15,
- 15))) {
+ if (!FloatDomain && VT.getSizeInBits() == 128 &&
+ (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+ Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
+ Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
+ Mask.equals(
+ {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15}))) {
bool Lo = Mask[0] == 0;
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
if (Depth == 1 && Root->getOpcode() == Shuffle)
@@ -19237,9 +19590,9 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// in practice PSHUFB tends to be *very* fast so we're more aggressive.
if ((Depth >= 3 || HasPSHUFB) && Subtarget->hasSSSE3()) {
SmallVector<SDValue, 16> PSHUFBMask;
- assert(Mask.size() <= 16 && "Can't shuffle elements smaller than bytes!");
- int Ratio = 16 / Mask.size();
- for (unsigned i = 0; i < 16; ++i) {
+ int NumBytes = VT.getSizeInBits() / 8;
+ int Ratio = NumBytes / Mask.size();
+ for (int i = 0; i < NumBytes; ++i) {
if (Mask[i / Ratio] == SM_SentinelUndef) {
PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8));
continue;
@@ -19249,12 +19602,13 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
: 255;
PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8));
}
- Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Input);
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input);
DCI.AddToWorklist(Op.getNode());
SDValue PSHUFBMaskOp =
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, PSHUFBMask);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVT, PSHUFBMask);
DCI.AddToWorklist(PSHUFBMaskOp.getNode());
- Op = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, Op, PSHUFBMaskOp);
+ Op = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Op, PSHUFBMaskOp);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
/*AddTo*/ true);
@@ -19312,10 +19666,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return false; // Bail if we hit a non-vector.
- // FIXME: This routine should be taught about 256-bit shuffles, or a 256-bit
- // version should be added.
- if (VT.getSizeInBits() != 128)
- return false;
assert(Root.getSimpleValueType().isVector() &&
"Shuffles operate on vector types!");
@@ -19418,12 +19768,26 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
/// PSHUF-style masks that can be reused with such instructions.
static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+ MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
+ // If we have more than 128-bits, only the low 128-bits of shuffle mask
+ // matter. Check that the upper masks are repeats and remove them.
+ if (VT.getSizeInBits() > 128) {
+ int LaneElts = 128 / VT.getScalarSizeInBits();
+#ifndef NDEBUG
+ for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i)
+ for (int j = 0; j < LaneElts; ++j)
+ assert(Mask[j] == Mask[i * LaneElts + j] - LaneElts &&
+ "Mask doesn't repeat in high 128-bit lanes!");
+#endif
+ Mask.resize(LaneElts);
+ }
+
switch (N.getOpcode()) {
case X86ISD::PSHUFD:
return Mask;
@@ -19496,7 +19860,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
- if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
+ if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
+ V.getSimpleValueType().getScalarType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
@@ -19670,8 +20035,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
- assert(VT == MVT::v8i16);
- (void)VT;
+ assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
@@ -19679,17 +20043,18 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
// See if this reduces to a PSHUFD which is no more expensive and can
// combine with more operations. Note that it has to at least flip the
// dwords as otherwise it would have been removed as a no-op.
- if (Mask[0] == 2 && Mask[1] == 3 && Mask[2] == 0 && Mask[3] == 1) {
+ if (makeArrayRef(Mask).equals({2, 3, 0, 1})) {
int DMask[] = {0, 1, 2, 3};
int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
DMask[DOffset + 0] = DOffset + 1;
DMask[DOffset + 1] = DOffset + 0;
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+ MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
+ V = DAG.getNode(ISD::BITCAST, DL, DVT, V);
DCI.AddToWorklist(V.getNode());
- V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
+ V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V,
getV4X86ShuffleImm8ForMask(DMask, DAG));
DCI.AddToWorklist(V.getNode());
- return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ return DAG.getNode(ISD::BITCAST, DL, VT, V);
}
// Look for shuffle patterns which can be implemented as a single unpack.
@@ -19717,18 +20082,14 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
int MappedMask[8];
for (int i = 0; i < 8; ++i)
MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
- const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
- const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
- if (std::equal(std::begin(MappedMask), std::end(MappedMask),
- std::begin(UnpackLoMask)) ||
- std::equal(std::begin(MappedMask), std::end(MappedMask),
- std::begin(UnpackHiMask))) {
+ if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
+ makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) {
// We can replace all three shuffles with an unpack.
- V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
+ V = DAG.getNode(ISD::BITCAST, DL, VT, D.getOperand(0));
DCI.AddToWorklist(V.getNode());
return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
: X86ISD::UNPCKH,
- DL, MVT::v8i16, V, V);
+ DL, VT, V, V);
}
}
}
@@ -19876,10 +20237,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // Only handle 128 wide vector from here on.
- if (!VT.is128BitVector())
- return SDValue();
-
// Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
@@ -20987,6 +21344,49 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
return SDValue();
}
+/// Check whether Cond is an AND/OR of SETCCs off of the same EFLAGS.
+/// Match:
+/// (X86or (X86setcc) (X86setcc))
+/// (X86cmp (and (X86setcc) (X86setcc)), 0)
+static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
+ X86::CondCode &CC1, SDValue &Flags,
+ bool &isAnd) {
+ if (Cond->getOpcode() == X86ISD::CMP) {
+ ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
+ if (!CondOp1C || !CondOp1C->isNullValue())
+ return false;
+
+ Cond = Cond->getOperand(0);
+ }
+
+ isAnd = false;
+
+ SDValue SetCC0, SetCC1;
+ switch (Cond->getOpcode()) {
+ default: return false;
+ case ISD::AND:
+ case X86ISD::AND:
+ isAnd = true;
+ // fallthru
+ case ISD::OR:
+ case X86ISD::OR:
+ SetCC0 = Cond->getOperand(0);
+ SetCC1 = Cond->getOperand(1);
+ break;
+ };
+
+ // Make sure we have SETCC nodes, using the same flags value.
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
+ SetCC1.getOpcode() != X86ISD::SETCC ||
+ SetCC0->getOperand(1) != SetCC1->getOperand(1))
+ return false;
+
+ CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0);
+ CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0);
+ Flags = SetCC0->getOperand(1);
+ return true;
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -21156,6 +21556,44 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ // Fold and/or of setcc's to double CMOV:
+ // (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
+ // (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
+ //
+ // This combine lets us generate:
+ // cmovcc1 (jcc1 if we don't have CMOV)
+ // cmovcc2 (same)
+ // instead of:
+ // setcc1
+ // setcc2
+ // and/or
+ // cmovne (jne if we don't have CMOV)
+ // When we can't use the CMOV instruction, it might increase branch
+ // mispredicts.
+ // When we can use CMOV, or when there is no mispredict, this improves
+ // throughput and reduces register pressure.
+ //
+ if (CC == X86::COND_NE) {
+ SDValue Flags;
+ X86::CondCode CC0, CC1;
+ bool isAndSetCC;
+ if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) {
+ if (isAndSetCC) {
+ std::swap(FalseOp, TrueOp);
+ CC0 = X86::GetOppositeBranchCondition(CC0);
+ CC1 = X86::GetOppositeBranchCondition(CC1);
+ }
+
+ SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, MVT::i8),
+ Flags};
+ SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps);
+ SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, MVT::i8), Flags};
+ SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1));
+ return CMOV;
+ }
+ }
+
return SDValue();
}
@@ -21166,24 +21604,16 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
default: return SDValue();
// SSE/AVX/AVX2 blend intrinsics.
case Intrinsic::x86_avx2_pblendvb:
- case Intrinsic::x86_avx2_pblendw:
- case Intrinsic::x86_avx2_pblendd_128:
- case Intrinsic::x86_avx2_pblendd_256:
// Don't try to simplify this intrinsic if we don't have AVX2.
if (!Subtarget->hasAVX2())
return SDValue();
// FALL-THROUGH
- case Intrinsic::x86_avx_blend_pd_256:
- case Intrinsic::x86_avx_blend_ps_256:
case Intrinsic::x86_avx_blendv_pd_256:
case Intrinsic::x86_avx_blendv_ps_256:
// Don't try to simplify this intrinsic if we don't have AVX.
if (!Subtarget->hasAVX())
return SDValue();
// FALL-THROUGH
- case Intrinsic::x86_sse41_pblendw:
- case Intrinsic::x86_sse41_blendpd:
- case Intrinsic::x86_sse41_blendps:
case Intrinsic::x86_sse41_blendvps:
case Intrinsic::x86_sse41_blendvpd:
case Intrinsic::x86_sse41_pblendvb: {
@@ -21640,7 +22070,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
// an and with a mask.
// We'd like to try to combine that into a shuffle with zero
// plus a bitcast, removing the and.
- if (N0.getOpcode() != ISD::BITCAST ||
+ if (N0.getOpcode() != ISD::BITCAST ||
N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
return SDValue();
@@ -21670,7 +22100,7 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
unsigned ResSize = N1.getValueType().getScalarSizeInBits();
// Make sure the splat matches the mask we expect
- if (SplatBitSize > ResSize ||
+ if (SplatBitSize > ResSize ||
(SplatValue + 1).exactLogBase2() != (int)SrcSize)
return SDValue();
@@ -21724,12 +22154,10 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
- SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget);
- if (Zext.getNode())
+ if (SDValue Zext = VectorZextCombine(N, DAG, DCI, Subtarget))
return Zext;
- SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget);
- if (R.getNode())
+ if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
EVT VT = N->getValueType(0);
@@ -22521,7 +22949,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
// If A and B occur in reverse order in RHS, then "swap" them (which means
// rewriting the mask).
if (A != C)
- CommuteVectorShuffleMask(RMask, NumElts);
+ ShuffleVectorSDNode::commuteMask(RMask);
// At this point LHS and RHS are equivalent to
// LHS = VECTOR_SHUFFLE A, B, LMask
@@ -22630,7 +23058,7 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
-
+
return SDValue();
}
@@ -22864,45 +23292,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- LHS.getValueType(), RHS, LHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
+ LHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
- RHS.getValueType(), LHS, RHS.getOperand(1));
- return DAG.getSetCC(SDLoc(N), N->getValueType(0),
- addV, DAG.getConstant(0, addV.getValueType()), CC);
+ SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
+ RHS.getOperand(1));
+ return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
+ DAG.getConstant(0, addV.getValueType()), CC);
}
- if (VT.getScalarType() == MVT::i1) {
- bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
- bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
- if (!IsSEXT0 && !IsVZero0)
- return SDValue();
- bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
- (RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ if (VT.getScalarType() == MVT::i1 &&
+ (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
+ bool IsSEXT0 =
+ (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
- if (!IsSEXT1 && !IsVZero1)
- return SDValue();
+ if (!IsSEXT0 || !IsVZero1) {
+ // Swap the operands and update the condition code.
+ std::swap(LHS, RHS);
+ CC = ISD::getSetCCSwappedOperands(CC);
+
+ IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
+ (LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
+ IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
+ }
if (IsSEXT0 && IsVZero1) {
- assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
+ assert(VT == LHS.getOperand(0).getValueType() &&
+ "Uexpected operand type");
+ if (CC == ISD::SETGT)
+ return DAG.getConstant(0, VT);
+ if (CC == ISD::SETLE)
+ return DAG.getConstant(1, VT);
+ if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
+
+ assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
+ "Unexpected condition code!");
return LHS.getOperand(0);
}
- if (IsSEXT1 && IsVZero0) {
- assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
- if (CC == ISD::SETEQ)
- return DAG.getNOT(DL, RHS.getOperand(0), VT);
- return RHS.getOperand(0);
- }
}
return SDValue();
@@ -22940,7 +23374,7 @@ static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
// countS and just gets an f32 from that address.
unsigned DestIndex =
cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
-
+
Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
// Create this as a scalar to vector to match the instruction pattern.
@@ -22964,7 +23398,7 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
// pattern-matching possibilities related to scalar math ops in SSE/AVX.
// x86InstrInfo knows how to commute this back after instruction selection
// if it would help register allocation.
-
+
// TODO: If optimizing for size or a processor that doesn't suffer from
// partial register update stalls, this should be transformed into a MOVSD
// instruction because a MOVSD is 1-2 bytes smaller than a BLENDPD.
@@ -23503,27 +23937,23 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
// X86 Inline Assembly Support
//===----------------------------------------------------------------------===//
-namespace {
- // Helper to match a string separated by whitespace.
- bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
- s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
-
- for (unsigned i = 0, e = args.size(); i != e; ++i) {
- StringRef piece(*args[i]);
- if (!s.startswith(piece)) // Check if the piece matches.
- return false;
+// Helper to match a string separated by whitespace.
+static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
+ S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
- s = s.substr(piece.size());
- StringRef::size_type pos = s.find_first_not_of(" \t");
- if (pos == 0) // We matched a prefix.
- return false;
+ for (StringRef Piece : Pieces) {
+ if (!S.startswith(Piece)) // Check if the piece matches.
+ return false;
- s = s.substr(pos);
- }
+ S = S.substr(Piece.size());
+ StringRef::size_type Pos = S.find_first_not_of(" \t");
+ if (Pos == 0) // We matched a prefix.
+ return false;
- return s.empty();
+ S = S.substr(Pos);
}
- const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+
+ return S.empty();
}
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
@@ -23563,12 +23993,12 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
// ops instead of emitting the bswap asm. For now, we don't support 486 or
// lower so don't worry about this.
// bswap $0
- if (matchAsm(AsmPieces[0], "bswap", "$0") ||
- matchAsm(AsmPieces[0], "bswapl", "$0") ||
- matchAsm(AsmPieces[0], "bswapq", "$0") ||
- matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
- matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
- matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
+ if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswapq", "$0"}) ||
+ matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) ||
+ matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) ||
+ matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
// No need to check constraints, nothing other than the equivalent of
// "=r,0" would be valid here.
return IntrinsicLowering::LowerToByteSwap(CI);
@@ -23577,8 +24007,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
// rorw $$8, ${0:w} --> llvm.bswap.i16
if (CI->getType()->isIntegerTy(16) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
- (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
- matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
+ (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
+ matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -23590,9 +24020,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
case 3:
if (CI->getType()->isIntegerTy(32) &&
IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
- matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
- matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
- matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+ matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
+ matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
+ matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
AsmPieces.clear();
const std::string &ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -23607,9 +24037,9 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
// bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64
- if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
- matchAsm(AsmPieces[1], "bswap", "%edx") &&
- matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+ if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
+ matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
+ matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
return IntrinsicLowering::LowerToByteSwap(CI);
}
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4423015..dd20ec2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -30,37 +30,37 @@ namespace llvm {
// Start the numbering where the builtin ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- /// BSF - Bit scan forward.
- /// BSR - Bit scan reverse.
+ /// Bit scan forward.
BSF,
+ /// Bit scan reverse.
BSR,
- /// SHLD, SHRD - Double shift instructions. These correspond to
+ /// Double shift instructions. These correspond to
/// X86::SHLDxx and X86::SHRDxx instructions.
SHLD,
SHRD,
- /// FAND - Bitwise logical AND of floating point values. This corresponds
+ /// Bitwise logical AND of floating point values. This corresponds
/// to X86::ANDPS or X86::ANDPD.
FAND,
- /// FOR - Bitwise logical OR of floating point values. This corresponds
+ /// Bitwise logical OR of floating point values. This corresponds
/// to X86::ORPS or X86::ORPD.
FOR,
- /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+ /// Bitwise logical XOR of floating point values. This corresponds
/// to X86::XORPS or X86::XORPD.
FXOR,
- /// FANDN - Bitwise logical ANDNOT of floating point values. This
+ /// Bitwise logical ANDNOT of floating point values. This
/// corresponds to X86::ANDNPS or X86::ANDNPD.
FANDN,
- /// FSRL - Bitwise logical right shift of floating point values. These
+ /// Bitwise logical right shift of floating point values. This
/// corresponds to X86::PSRLDQ.
FSRL,
- /// CALL - These operations represent an abstract X86 call
+ /// These operations represent an abstract X86 call
/// instruction, which includes a bunch of information. In particular the
/// operands of these node are:
///
@@ -79,8 +79,7 @@ namespace llvm {
///
CALL,
- /// RDTSC_DAG - This operation implements the lowering for
- /// readcyclecounter
+ /// This operation implements the lowering for readcyclecounter
RDTSC_DAG,
/// X86 Read Time-Stamp Counter and Processor ID.
@@ -131,187 +130,186 @@ namespace llvm {
/// 1 is the number of bytes of stack to pop.
RET_FLAG,
- /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ /// Repeat fill, corresponds to X86::REP_STOSx.
REP_STOS,
- /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ /// Repeat move, corresponds to X86::REP_MOVSx.
REP_MOVS,
- /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+ /// On Darwin, this node represents the result of the popl
/// at function entry, used for PIC code.
GlobalBaseReg,
- /// Wrapper - A wrapper node for TargetConstantPool,
+ /// A wrapper node for TargetConstantPool,
/// TargetExternalSymbol, and TargetGlobalAddress.
Wrapper,
- /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+ /// Special wrapper used under X86-64 PIC mode for RIP
/// relative displacements.
WrapperRIP,
- /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+ /// Copies a 64-bit value from the low word of an XMM vector
/// to an MMX vector. If you think this is too close to the previous
/// mnemonic, so do I; blame Intel.
MOVDQ2Q,
- /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX
+ /// Copies a 32-bit value from the low word of a MMX
/// vector to a GPR.
MMX_MOVD2W,
- /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector
+ /// Copies a GPR into the low 32-bit word of a MMX vector
/// and zero out the high word.
MMX_MOVW2D,
- /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+ /// Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
- /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+ /// Extract a 16-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRW.
PEXTRW,
- /// INSERTPS - Insert any element of a 4 x float vector into any element
+ /// Insert any element of a 4 x float vector into any element
/// of a destination 4 x floatvector.
INSERTPS,
- /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+ /// Insert the lower 8-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRB.
PINSRB,
- /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+ /// Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
PINSRW, MMX_PINSRW,
- /// PSHUFB - Shuffle 16 8-bit values within a vector.
+ /// Shuffle 16 8-bit values within a vector.
PSHUFB,
- /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+ /// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
- /// PSIGN - Copy integer sign.
+ /// Copy integer sign.
PSIGN,
- /// BLENDI - Blend where the selector is an immediate.
+ /// Blend where the selector is an immediate.
BLENDI,
- /// SHRUNKBLEND - Blend where the condition has been shrunk.
+ /// Blend where the condition has been shrunk.
/// This is used to emphasize that the condition mask is
/// no more valid for generic VSELECT optimizations.
SHRUNKBLEND,
- /// ADDSUB - Combined add and sub on an FP vector.
+ /// Combined add and sub on an FP vector.
ADDSUB,
- // FADD, FSUB, FMUL, FDIV, FMIN, FMAX - FP vector ops with rounding mode.
+ // FP vector ops with rounding mode.
FADD_RND,
FSUB_RND,
FMUL_RND,
FDIV_RND,
- // SUBUS - Integer sub with unsigned saturation.
+ // Integer sub with unsigned saturation.
SUBUS,
- /// HADD - Integer horizontal add.
+ /// Integer horizontal add.
HADD,
- /// HSUB - Integer horizontal sub.
+ /// Integer horizontal sub.
HSUB,
- /// FHADD - Floating point horizontal add.
+ /// Floating point horizontal add.
FHADD,
- /// FHSUB - Floating point horizontal sub.
+ /// Floating point horizontal sub.
FHSUB,
- /// UMAX, UMIN - Unsigned integer max and min.
+ /// Unsigned integer max and min.
UMAX, UMIN,
- /// SMAX, SMIN - Signed integer max and min.
+ /// Signed integer max and min.
SMAX, SMIN,
- /// FMAX, FMIN - Floating point max and min.
- ///
+ /// Floating point max and min.
FMAX, FMIN,
- /// FMAXC, FMINC - Commutative FMIN and FMAX.
+ /// Commutative FMIN and FMAX.
FMAXC, FMINC,
- /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
- /// approximation. Note that these typically require refinement
+ /// Floating point reciprocal-sqrt and reciprocal approximation.
+ /// Note that these typically require refinement
/// in order to obtain suitable precision.
FRSQRT, FRCP,
- // TLSADDR - Thread Local Storage.
+ // Thread Local Storage.
TLSADDR,
- // TLSBASEADDR - Thread Local Storage. A call to get the start address
+ // Thread Local Storage. A call to get the start address
// of the TLS block for the current module.
TLSBASEADDR,
- // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,
- // EH_RETURN - Exception Handling helpers.
+ // Exception Handling helpers.
EH_RETURN,
- // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ // SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
- // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ // SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
- /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for
+ /// Tail call return. See X86TargetLowering::LowerCall for
/// the list of operands.
TC_RETURN,
- // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements.
+ // Vector move to low scalar and zero higher vector elements.
VZEXT_MOVL,
- // VZEXT - Vector integer zero-extend.
+ // Vector integer zero-extend.
VZEXT,
- // VSEXT - Vector integer signed-extend.
+ // Vector integer signed-extend.
VSEXT,
- // VTRUNC - Vector integer truncate.
+ // Vector integer truncate.
VTRUNC,
- // VTRUNC - Vector integer truncate with mask.
+ // Vector integer truncate with mask.
VTRUNCM,
- // VFPEXT - Vector FP extend.
+ // Vector FP extend.
VFPEXT,
- // VFPROUND - Vector FP round.
+ // Vector FP round.
VFPROUND,
- // VSHL, VSRL - 128-bit vector logical left / right shift
+ // 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
- // VSHL, VSRL, VSRA - Vector shift elements
+ // Vector shift elements
VSHL, VSRL, VSRA,
- // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+ // Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
- // CMPP - Vector packed double/float comparison.
+ // Vector packed double/float comparison.
CMPP,
- // PCMP* - Vector integer comparisons.
+ // Vector integer comparisons.
PCMPEQ, PCMPGT,
- // PCMP*M - Vector integer comparisons, the result is in a mask vector.
+ // Vector integer comparisons, the result is in a mask vector.
PCMPEQM, PCMPGTM,
- /// CMPM, CMPMU - Vector comparison generating mask bits for fp and
+ /// Vector comparison generating mask bits for fp and
/// integer signed and unsigned data types.
CMPM,
CMPMU,
- // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+ // Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- BEXTR, // BEXTR - Bit field extract
+ BEXTR, // Bit field extract
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
@@ -322,16 +320,16 @@ namespace llvm {
UDIVREM8_ZEXT_HREG,
SDIVREM8_SEXT_HREG,
- // MUL_IMM - X86 specific multiply by immediate.
+ // X86-specific multiply by immediate.
MUL_IMM,
- // PTEST - Vector bitwise comparisons.
+ // Vector bitwise comparisons.
PTEST,
- // TESTP - Vector packed fp sign bitwise comparisons.
+ // Vector packed fp sign bitwise comparisons.
TESTP,
- // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector.
+ // Vector "test" in AVX-512, the result is in a mask vector.
TESTM,
TESTNM,
@@ -697,6 +695,12 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
/// Given a physical register constraint
/// (e.g. {edx}), return the register number and the register class for the
/// register. This should only be used for C_Register constraints. On
@@ -993,7 +997,8 @@ namespace llvm {
bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ TargetLoweringBase::AtomicRMWExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 4923bc5..509602f 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -74,6 +74,15 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
VTName)), VTName));
+
+ PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
+ !if (!eq (TypeVariantName, "i"),
+ !if (!eq (Size, 128), "v2i64",
+ !if (!eq (Size, 256), "v4i64",
+ !if (!eq (Size, 512),
+ !if (!eq (EltSize, 64), "v8i64", "v16i32"),
+ VTName))), VTName));
+
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
// The corresponding float type, e.g. v16f32 for v16i32
@@ -107,6 +116,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// create the canonical constant zero node ImmAllZerosV.
ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
+
+ string ZSuffix = !if (!eq (Size, 128), "Z128",
+ !if (!eq (Size, 256), "Z256", "Z"));
}
def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
@@ -1559,6 +1571,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
!strconcat("vcmp", suffix,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
+ def rrib_alt: AVX512PIi8<0xC2, MRMSrcReg,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
+ !strconcat("vcmp", suffix,
+ "\t{{sae}, $cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc, {sae}}"),
+ [], d>, EVEX_B;
let mayLoad = 1 in
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
@@ -2047,6 +2064,8 @@ let Predicates = [HasVLX] in {
(v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
(v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+ def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
(v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
@@ -2062,177 +2081,193 @@ def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16),
(I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
+
+def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
+ (v4i1 (COPY_TO_REGCLASS
+ (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
+ (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
+def : Pat<(v4i1 (X86vsrli VK4:$src, (i8 imm:$imm))),
+ (v4i1 (COPY_TO_REGCLASS
+ (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16),
+ (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Aligned and unaligned load and store
//
-multiclass avx512_load<bits<8> opc, string OpcodeStr, PatFrag ld_frag,
- RegisterClass KRC, RegisterClass RC,
- ValueType vt, ValueType zvt, X86MemOperand memop,
- Domain d, bit IsReMaterializable = 1> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+
+multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ PatFrag ld_frag, PatFrag mload,
+ bit IsReMaterializable = 1> {
+ let hasSideEffects = 0 in {
+ def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
- d>, EVEX;
- def rrkz : AVX512PI<opc, MRMSrcReg, (outs RC:$dst), (ins KRC:$mask, RC:$src),
+ _.ExeDomain>, EVEX;
+ def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"), [], d>, EVEX, EVEX_KZ;
- }
+ "${dst} {${mask}} {z}, $src}"), [], _.ExeDomain>,
+ EVEX, EVEX_KZ;
+
let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
SchedRW = [WriteLoad] in
- def rm : AVX512PI<opc, MRMSrcMem, (outs RC:$dst), (ins memop:$src),
+ def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (bitconvert (ld_frag addr:$src))))],
- d>, EVEX;
-
- let AddedComplexity = 20 in {
- let Constraints = "$src0 = $dst", hasSideEffects = 0 in {
- let hasSideEffects = 0 in
- def rrk : AVX512PI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, RC:$src1),
- !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
- "${dst} {${mask}}, $src1}"),
- [(set RC:$dst, (vt (vselect KRC:$mask,
- (vt RC:$src1),
- (vt RC:$src0))))],
- d>, EVEX, EVEX_K;
+ [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
+ _.ExeDomain>, EVEX;
+
+ let Constraints = "$src0 = $dst" in {
+ def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
+ !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
+ "${dst} {${mask}}, $src1}"),
+ [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ (_.VT _.RC:$src1),
+ (_.VT _.RC:$src0))))], _.ExeDomain>,
+ EVEX, EVEX_K;
let mayLoad = 1, SchedRW = [WriteLoad] in
- def rmk : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src0, KRC:$mask, memop:$src1),
+ def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
"${dst} {${mask}}, $src1}"),
- [(set RC:$dst, (vt
- (vselect KRC:$mask,
- (vt (bitconvert (ld_frag addr:$src1))),
- (vt RC:$src0))))],
- d>, EVEX, EVEX_K;
+ [(set _.RC:$dst, (_.VT
+ (vselect _.KRCWM:$mask,
+ (_.VT (bitconvert (ld_frag addr:$src1))),
+ (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
}
let mayLoad = 1, SchedRW = [WriteLoad] in
- def rmkz : AVX512PI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, memop:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
- "${dst} {${mask}} {z}, $src}"),
- [(set RC:$dst, (vt
- (vselect KRC:$mask,
- (vt (bitconvert (ld_frag addr:$src))),
- (vt (bitconvert (zvt immAllZerosV))))))],
- d>, EVEX, EVEX_KZ;
+ def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src),
+ OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
+ "${dst} {${mask}} {z}, $src}",
+ [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+ (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
+ _.ExeDomain>, EVEX, EVEX_KZ;
}
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
+
+ def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
+ (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
+ _.KRCWM:$mask, addr:$ptr)>;
}
-multiclass avx512_load_vl<bits<8> opc, string OpcodeStr, string ld_pat,
- string elty, string elsz, string vsz512,
- string vsz256, string vsz128, Domain d,
- Predicate prd, bit IsReMaterializable = 1> {
+multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _,
+ Predicate prd,
+ bit IsReMaterializable = 1> {
let Predicates = [prd] in
- defm Z : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##"v"##vsz512##elty##elsz),
- !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
- !cast<ValueType>("v"##vsz512##elty##elsz), v16i32,
- !cast<X86MemOperand>(elty##"512mem"), d,
- IsReMaterializable>, EVEX_V512;
+ defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
+ masked_load_aligned512, IsReMaterializable>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
- "v"##vsz256##elty##elsz, "v4i64")),
- !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
- !cast<ValueType>("v"##vsz256##elty##elsz), v8i32,
- !cast<X86MemOperand>(elty##"256mem"), d,
- IsReMaterializable>, EVEX_V256;
-
- defm Z128 : avx512_load<opc, OpcodeStr,
- !cast<PatFrag>(ld_pat##!if(!eq(elty,"f"),
- "v"##vsz128##elty##elsz, "v2i64")),
- !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
- !cast<ValueType>("v"##vsz128##elty##elsz), v4i32,
- !cast<X86MemOperand>(elty##"128mem"), d,
- IsReMaterializable>, EVEX_V128;
+ defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
+ masked_load_aligned256, IsReMaterializable>, EVEX_V256;
+ defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
+ masked_load_aligned128, IsReMaterializable>, EVEX_V128;
}
}
+multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _,
+ Predicate prd,
+ bit IsReMaterializable = 1> {
+ let Predicates = [prd] in
+ defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V512;
-multiclass avx512_store<bits<8> opc, string OpcodeStr, PatFrag st_frag,
- ValueType OpVT, RegisterClass KRC, RegisterClass RC,
- X86MemOperand memop, Domain d> {
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V256;
+ defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
+ masked_load_unaligned, IsReMaterializable>, EVEX_V128;
+ }
+}
+
+multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ PatFrag st_frag, PatFrag mstore> {
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def rr_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [], d>,
- EVEX;
+ def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+ OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
+ _.ExeDomain>, EVEX;
let Constraints = "$src1 = $dst" in
- def rrk_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), [], d>,
- EVEX, EVEX_K;
- def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [], d>, EVEX, EVEX_KZ;
+ def rrk_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
+ OpcodeStr #
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+ [], _.ExeDomain>, EVEX, EVEX_K;
+ def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr #
+ "\t{$src, ${dst} {${mask}} {z}|" #
+ "${dst} {${mask}} {z}, $src}",
+ [], _.ExeDomain>, EVEX, EVEX_KZ;
}
let mayStore = 1 in {
- def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
+ def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(st_frag (OpVT RC:$src), addr:$dst)], d>, EVEX;
+ [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
def mrk : AVX512PI<opc, MRMDestMem, (outs),
- (ins memop:$dst, KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [], d>, EVEX, EVEX_K;
+ (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
+ [], _.ExeDomain>, EVEX, EVEX_K;
}
+
+ def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
+ (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
+ _.KRCWM:$mask, _.RC:$src)>;
}
-multiclass avx512_store_vl<bits<8> opc, string OpcodeStr, string st_pat,
- string st_suff_512, string st_suff_256,
- string st_suff_128, string elty, string elsz,
- string vsz512, string vsz256, string vsz128,
- Domain d, Predicate prd> {
+multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_512),
- !cast<ValueType>("v"##vsz512##elty##elsz),
- !cast<RegisterClass>("VK"##vsz512##"WM"), VR512,
- !cast<X86MemOperand>(elty##"512mem"), d>, EVEX_V512;
+ defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
+ masked_store_unaligned>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_256),
- !cast<ValueType>("v"##vsz256##elty##elsz),
- !cast<RegisterClass>("VK"##vsz256##"WM"), VR256X,
- !cast<X86MemOperand>(elty##"256mem"), d>, EVEX_V256;
-
- defm Z128 : avx512_store<opc, OpcodeStr, !cast<PatFrag>(st_pat##st_suff_128),
- !cast<ValueType>("v"##vsz128##elty##elsz),
- !cast<RegisterClass>("VK"##vsz128##"WM"), VR128X,
- !cast<X86MemOperand>(elty##"128mem"), d>, EVEX_V128;
+ defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
+ masked_store_unaligned>, EVEX_V256;
+ defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
+ masked_store_unaligned>, EVEX_V128;
}
}
-defm VMOVAPS : avx512_load_vl<0x28, "vmovaps", "alignedload", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
- avx512_store_vl<0x29, "vmovaps", "alignedstore",
- "512", "256", "", "f", "32", "16", "8", "4",
- SSEPackedSingle, HasAVX512>,
- PS, EVEX_CD8<32, CD8VF>;
+multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
+ masked_store_aligned512>, EVEX_V512;
+
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
+ masked_store_aligned256>, EVEX_V256;
+ defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
+ masked_store_aligned128>, EVEX_V128;
+ }
+}
-defm VMOVAPD : avx512_load_vl<0x28, "vmovapd", "alignedload", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512>,
- avx512_store_vl<0x29, "vmovapd", "alignedstore",
- "512", "256", "", "f", "64", "8", "4", "2",
- SSEPackedDouble, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVUPS : avx512_load_vl<0x10, "vmovups", "load", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
- avx512_store_vl<0x11, "vmovups", "store", "", "", "", "f", "32",
- "16", "8", "4", SSEPackedSingle, HasAVX512>,
+defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
+ HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
+
+defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512>,
+ avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
PS, EVEX_CD8<32, CD8VF>;
-defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", "load", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512, 0>,
- avx512_store_vl<0x11, "vmovupd", "store", "", "", "", "f", "64",
- "8", "4", "2", SSEPackedDouble, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
+ avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
(bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
@@ -2276,6 +2311,7 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
+let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
(VMOVUPSZmrk addr:$ptr,
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2285,73 +2321,36 @@ def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
(v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)),
- (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)),
- (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
- (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask,
- (bc_v16f32 (v16i32 immAllZerosV)))),
- (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))),
- (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask,
- (bc_v8f64 (v16i32 immAllZerosV)))),
- (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))),
- (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
(v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+}
-defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
- "16", "8", "4", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
- "512", "256", "", "i", "32", "16", "8", "4",
- SSEPackedInt, HasAVX512>,
- PD, EVEX_CD8<32, CD8VF>;
-
-defm VMOVDQA64 : avx512_load_vl<0x6F, "vmovdqa64", "alignedload", "i", "64",
- "8", "4", "2", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqa64", "alignedstore",
- "512", "256", "", "i", "64", "8", "4", "2",
- SSEPackedInt, HasAVX512>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", "load", "i", "8",
- "64", "32", "16", SSEPackedInt, HasBWI>,
- avx512_store_vl<0x7F, "vmovdqu8", "store", "", "", "",
- "i", "8", "64", "32", "16", SSEPackedInt,
+defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
+ HasAVX512>, PD, EVEX_CD8<32, CD8VF>;
+
+defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
+ HasAVX512>,
+ avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
+ HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
HasBWI>, XD, EVEX_CD8<8, CD8VF>;
-defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", "load", "i", "16",
- "32", "16", "8", SSEPackedInt, HasBWI>,
- avx512_store_vl<0x7F, "vmovdqu16", "store", "", "", "",
- "i", "16", "32", "16", "8", SSEPackedInt,
+defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
+ avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", "load", "i", "32",
- "16", "8", "4", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqu32", "store", "", "", "",
- "i", "32", "16", "8", "4", SSEPackedInt,
+defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
-defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", "load", "i", "64",
- "8", "4", "2", SSEPackedInt, HasAVX512>,
- avx512_store_vl<0x7F, "vmovdqu64", "store", "", "", "",
- "i", "64", "8", "4", "2", SSEPackedInt,
+defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512>,
+ avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
@@ -2389,37 +2388,8 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))),
- (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
- (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))),
- (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask,
- (bc_v8i64 (v16i32 immAllZerosV)))),
- (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))),
- (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
-
-def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)),
- (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
-
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)),
- (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
-
-// SKX replacement
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
- (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>;
-
-// KNL replacement
+// NoVLX patterns
+let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
(VMOVDQU32Zmrk addr:$ptr,
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
@@ -2428,7 +2398,7 @@ def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
(v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
(v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
+}
// Move Int Doubleword to Packed Double Int
//
@@ -3243,28 +3213,95 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
//===----------------------------------------------------------------------===//
+multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode OpNode, SDNode VecNode, OpndItins itins,
+ bit IsCommutable> {
-multiclass avx512_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SizeItins itins> {
- defm SSZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), OpNode, FR32X,
- f32mem, itins.s, 0>, XS, EVEX_4V, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
- defm SDZ : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), OpNode, FR64X,
- f64mem, itins.d, 0>, XD, VEX_W, EVEX_4V, VEX_LIG,
- EVEX_CD8<64, CD8VT1>;
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_CURRENT)),
+ "", itins.rr, IsCommutable>;
+
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ (i32 FROUND_CURRENT)),
+ "", itins.rm, IsCommutable>;
+ let isCodeGenOnly = 1, isCommutable = IsCommutable,
+ Predicates = [HasAVX512] in {
+ def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
+ itins.rr>;
+ def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2)))], itins.rr>;
+ }
}
-let isCommutable = 1 in {
-defm VADD : avx512_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>;
-defm VMUL : avx512_binop_s<0x59, "mul", fmul, SSE_ALU_ITINS_S>;
-defm VMIN : avx512_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>;
-defm VMAX : avx512_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>;
+multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 imm:$rc)), "", itins.rr, IsCommutable>,
+ EVEX_B, EVEX_RC;
}
-let isCommutable = 0 in {
-defm VSUB : avx512_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>;
-defm VDIV : avx512_binop_s<0x5E, "div", fdiv, SSE_ALU_ITINS_S>;
+multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ SDNode VecNode, OpndItins itins, bit IsCommutable> {
+
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 FROUND_NO_EXC)), "{sae}">, EVEX_B;
}
+multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode VecNode,
+ SizeItins itins, bit IsCommutable> {
+ defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+ itins.s, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
+ itins.s, IsCommutable>,
+ XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+ itins.d, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
+ itins.d, IsCommutable>,
+ XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+
+multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode VecNode,
+ SizeItins itins, bit IsCommutable> {
+ defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
+ itins.s, IsCommutable>,
+ avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
+ itins.s, IsCommutable>,
+ XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
+ itins.d, IsCommutable>,
+ avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
+ itins.d, IsCommutable>,
+ XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+}
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
+defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 1>;
+defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 1>;
+
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, bit IsCommutable> {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -3411,15 +3448,27 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX512BIi8Base, EVEX_4V;
+ let mayLoad = 1 in
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (_.VT (OpNode (_.LdFrag addr:$src1), (i8 imm:$src2))),
+ (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i8 imm:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V;
}
+multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
+ "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
+ (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIi8Base, EVEX_4V, EVEX_B;
+}
+
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
+ ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
// src2 is always 128-bit
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
@@ -3430,46 +3479,95 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
- " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase, EVEX_4V;
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
+ EVEX_4V;
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
- defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
+ ValueType SrcVT, PatFrag bc_frag,
+ AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info512>, EVEX_V512,
+ EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info256>, EVEX_V256,
+ EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
+ defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
+ VTInfo.info128>, EVEX_V128,
+ EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
+ }
}
-multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
- SDNode OpNode> {
+multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
+ string OpcodeStr, SDNode OpNode> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
- v16i32_info>, EVEX_CD8<32, CD8VQ>;
+ avx512vl_i32_info, HasAVX512>;
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
- v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+ avx512vl_i64_info, HasAVX512>, VEX_W;
+ defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
+ avx512vl_i16_info, HasBWI>;
+}
+
+multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+ let Predicates = [HasAVX512] in
+ defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info256>, EVEX_V256;
+ defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info128>,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ VTInfo.info128>, EVEX_V128;
+ }
}
-defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
- v16i32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_w<bits<8> opcw,
+ Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v32i16_info>, EVEX_V512;
+ let Predicates = [HasVLX, HasBWI] in {
+ defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v16i16x_info>, EVEX_V256;
+ defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
+ v8i16x_info>, EVEX_V128;
+ }
+}
-defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
- v16i32_info>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
+ Format ImmFormR, Format ImmFormM,
+ string OpcodeStr, SDNode OpNode> {
+ defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+ defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
+ avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
-defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
- v16i32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
- v8i64_info>, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
+ avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>;
-defm VPSLL : avx512_shift_types<0xF2, 0xF3, "vpsll", X86vshl>;
-defm VPSRA : avx512_shift_types<0xE2, 0xE2, "vpsra", X86vsra>;
-defm VPSRL : avx512_shift_types<0xD2, 0xD3, "vpsrl", X86vsrl>;
+defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
+ avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>;
+
+defm VPSRA : avx512_shift_rmi_dq<0x72, 0x73, MRM4r, MRM4m, "vpsra", X86vsrai>,
+ avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>;
+
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>;
+
+defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
+defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
+defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
@@ -3481,29 +3579,71 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
" ", SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
+ let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
- " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V;
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
+ EVEX_CD8<_.EltSize, CD8VF>;
}
+multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src2))))),
+ " ", SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
- defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
+ avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+ }
}
multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VQ>;
+ avx512vl_i32_info>;
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
- avx512vl_i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
+ avx512vl_i64_info>, VEX_W;
}
-defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>;
-defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>;
-defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>;
+multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let Predicates = [HasBWI] in
+ defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
+ EVEX_V512, VEX_W;
+ let Predicates = [HasVLX, HasBWI] in {
+
+ defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
+ EVEX_V256, VEX_W;
+ defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
+ EVEX_V128, VEX_W;
+ }
+}
+
+defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
+ avx512_var_shift_w<0x12, "vpsllvw", shl>;
+defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
+ avx512_var_shift_w<0x11, "vpsravw", sra>;
+defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
+defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
//===----------------------------------------------------------------------===//
// AVX-512 - MOVDDUP
@@ -4919,81 +5059,74 @@ defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
-multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag GatherNode> {
-let mayLoad = 1, hasTwoExplicitDefs = 1,
+multiclass avx512_gather<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
+let mayLoad = 1,
Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb" in
- def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
- (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
+ def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst, KRC:$mask_wb),
+ (ins RC:$src1, KRC:$mask, memop:$src2),
!strconcat(OpcodeStr,
"\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- [(set _.RC:$dst, _.KRCWM:$mask_wb,
- (_.VT (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
- vectoraddr:$src2)))]>, EVEX, EVEX_K,
- EVEX_CD8<_.EltSize, CD8VT1>;
+ []>, EVEX, EVEX_K;
}
let ExeDomain = SSEPackedDouble in {
-defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", v8f64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", v8f64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
+defm VGATHERDPDZ : avx512_gather<0x92, "vgatherdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VGATHERQPDZ : avx512_gather<0x93, "vgatherqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let ExeDomain = SSEPackedSingle in {
-defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", v16f32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", v8f32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VGATHERDPSZ : avx512_gather<0x92, "vgatherdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", v8i64_info, vy64xmem,
- mgatherv8i32>, EVEX_V512, VEX_W;
-defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", v16i32_info, vz32mem,
- mgatherv16i32>, EVEX_V512;
-
-defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", v8i64_info, vz64mem,
- mgatherv8i64>, EVEX_V512, VEX_W;
-defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", v8i32x_info, vz64mem,
- mgatherv8i64>, EVEX_V512;
+defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
-multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- X86MemOperand memop, PatFrag ScatterNode> {
+defm VPGATHERQQZ : avx512_gather<0x91, "vpgatherqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPGATHERQDZ : avx512_gather<0x91, "vpgatherqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+multiclass avx512_scatter<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand memop> {
let mayStore = 1, Constraints = "$mask = $mask_wb" in
-
- def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
- (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
+ def mr : AVX5128I<opc, MRMDestMem, (outs KRC:$mask_wb),
+ (ins memop:$dst, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
- _.KRCWM:$mask, vectoraddr:$dst))]>,
- EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
+ []>, EVEX, EVEX_K;
}
let ExeDomain = SSEPackedDouble in {
-defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", v8f64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", v8f64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
+defm VSCATTERDPDZ : avx512_scatter<0xA2, "vscatterdpd", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VSCATTERQPDZ : avx512_scatter<0xA3, "vscatterqpd", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let ExeDomain = SSEPackedSingle in {
-defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", v16f32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
-defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", v8f32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VSCATTERDPSZ : avx512_scatter<0xA2, "vscatterdps", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
+defm VSCATTERQPSZ : avx512_scatter<0xA3, "vscatterqps", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", v8i64_info, vy64xmem,
- mscatterv8i32>, EVEX_V512, VEX_W;
-defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", v16i32_info, vz32mem,
- mscatterv16i32>, EVEX_V512;
+defm VPSCATTERDQZ : avx512_scatter<0xA0, "vpscatterdq", VK8WM, VR512, vy64xmem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERDDZ : avx512_scatter<0xA0, "vpscatterdd", VK16WM, VR512, vz32mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", v8i64_info, vz64mem,
- mscatterv8i64>, EVEX_V512, VEX_W;
-defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", v8i32x_info, vz64mem,
- mscatterv8i64>, EVEX_V512;
+defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
+ EVEX_V512, EVEX_CD8<32, CD8VT1>;
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index bf515a8..0bdabdf 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -282,6 +282,8 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
@@ -304,8 +306,6 @@ def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
-def X86mgather : SDNode<"X86ISD::GATHER", SDTypeProfile<1, 3,
- [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -526,58 +526,6 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
-def mgatherv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v8i32 ||
- // Mgt->getBasePtr().getValueType() == MVT::v8i32);
- //return false;
- return N != 0;
-}]>;
-
-def mgatherv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v8i64 ||
- // Mgt->getBasePtr().getValueType() == MVT::v8i64);
- //return false;
- return N != 0;
-}]>;
-def mgatherv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_gather node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
- // return (Mgt->getIndex().getValueType() == MVT::v16i32 ||
- // Mgt->getBasePtr().getValueType() == MVT::v16i32);
- //return false;
- return N != 0;
-}]>;
-
-def mscatterv8i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v8i32 ||
- // Sc->getBasePtr().getValueType() == MVT::v8i32);
- //return false;
- return N != 0;
-}]>;
-
-def mscatterv8i64 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v8i64 ||
- // Sc->getBasePtr().getValueType() == MVT::v8i64);
- //return false;
- return N != 0;
-}]>;
-def mscatterv16i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_scatter node:$src1, node:$src2, node:$src3) , [{
- //if (MaskedScatterSDNode *Sc = dyn_cast<MaskedScatterSDNode>(N))
- // return (Sc->getIndex().getValueType() == MVT::v16i32 ||
- // Sc->getBasePtr().getValueType() == MVT::v16i32);
- //return false;
- return N != 0;
-}]>;
-
// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
@@ -681,3 +629,55 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
return X86::isVINSERT256Index(N);
}], INSERT_get_vinsert256_imm>;
+def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedLoadSDNode>(N))
+ return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_load node:$src1, node:$src2, node:$src3), [{
+ return (dyn_cast<MaskedLoadSDNode>(N) != 0);
+}]>;
+
+def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
+ return false;
+}]>;
+
+def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
+ return false;
+}]>;
+
+def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ if (dyn_cast<MaskedStoreSDNode>(N))
+ return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
+ return false;
+}]>;
+
+def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return (dyn_cast<MaskedStoreSDNode>(N) != 0);
+}]>;
+
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index f5b9680..538ec1c 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -104,7 +104,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
: X86GenInstrInfo(
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
- Subtarget(STI), RI(STI) {
+ Subtarget(STI), RI(STI.getTargetTriple()) {
static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
@@ -4573,9 +4573,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
return nullptr;
// Check whether we can fold the def into SrcOperandId.
- SmallVector<unsigned, 8> Ops;
- Ops.push_back(SrcOperandId);
- MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+ MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
if (FoldMI) {
FoldAsLoadDefReg = 0;
return FoldMI;
@@ -4670,7 +4668,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
}
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
MachineInstr *MI,
const TargetInstrInfo &TII) {
// Create the base instruction with the memory operand as the first part.
@@ -4697,9 +4695,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
return MIB;
}
-static MachineInstr *FuseInst(MachineFunction &MF,
- unsigned Opcode, unsigned OpNo,
- const SmallVectorImpl<MachineOperand> &MOs,
+static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
+ unsigned OpNo, ArrayRef<MachineOperand> MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
@@ -4723,7 +4720,7 @@ static MachineInstr *FuseInst(MachineFunction &MF,
}
static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
MachineInstr *MI) {
MachineFunction &MF = *MI->getParent()->getParent();
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
@@ -4736,12 +4733,12 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
return MIB.addImm(0);
}
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI, unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
- unsigned Size, unsigned Align,
- bool AllowCommute) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ unsigned OpNum,
+ ArrayRef<MachineOperand> MOs,
+ unsigned Size, unsigned Align,
+ bool AllowCommute) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = Subtarget.callRegIndirect();
@@ -5104,10 +5101,10 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
MI->addRegisterKilled(Reg, TRI, true);
}
-MachineInstr*
-X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ int FrameIndex) const {
// Check switch flag
if (NoFusing) return nullptr;
@@ -5145,10 +5142,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
} else if (Ops.size() != 1)
return nullptr;
- SmallVector<MachineOperand,4> MOs;
- MOs.push_back(MachineOperand::CreateFI(FrameIndex));
- return foldMemoryOperandImpl(MF, MI, Ops[0], MOs,
- Size, Alignment, /*AllowCommute=*/true);
+ return foldMemoryOperandImpl(MF, MI, Ops[0],
+ MachineOperand::CreateFI(FrameIndex), Size,
+ Alignment, /*AllowCommute=*/true);
}
static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
@@ -5170,9 +5166,9 @@ static bool isPartialRegisterLoad(const MachineInstr &LoadMI,
return false;
}
-MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+MachineInstr *X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
+ ArrayRef<unsigned> Ops,
MachineInstr *LoadMI) const {
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
@@ -5295,8 +5291,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return nullptr;
// Folding a normal load. Just copy the load's address operands.
- for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
- MOs.push_back(LoadMI->getOperand(i));
+ MOs.append(LoadMI->operands_begin() + NumOps - X86::AddrNumOperands,
+ LoadMI->operands_begin() + NumOps);
break;
}
}
@@ -5304,9 +5300,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
-
bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
+ ArrayRef<unsigned> Ops) const {
// Check switch flag
if (NoFusing) return 0;
@@ -5559,7 +5554,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
}
if (Load)
BeforeOps.push_back(SDValue(Load, 0));
- std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+ BeforeOps.insert(BeforeOps.end(), AfterOps.begin(), AfterOps.end());
SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
NewNodes.push_back(NewNode);
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 4d15467..0dd8101 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -305,23 +305,21 @@ public:
/// folding and return true, otherwise it should return false. If it folds
/// the instruction, it is likely that the MachineInstruction the iterator
/// references has been changed.
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
int FrameIndex) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const override;
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ ArrayRef<unsigned> Ops,
+ MachineInstr *LoadMI) const override;
/// canFoldMemoryOperand - Returns true if the specified load / store is
/// folding is possible.
- bool canFoldMemoryOperand(const MachineInstr*,
- const SmallVectorImpl<unsigned> &) const override;
+ bool canFoldMemoryOperand(const MachineInstr *,
+ ArrayRef<unsigned>) const override;
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
@@ -406,10 +404,9 @@ public:
void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
const TargetRegisterInfo *TRI) const override;
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned OpNum,
- const SmallVectorImpl<MachineOperand> &MOs,
+ ArrayRef<MachineOperand> MOs,
unsigned Size, unsigned Alignment,
bool AllowCommute) const;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9881caf..e9a0431 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -572,10 +572,13 @@ def X86GR32orGR64AsmOperand : AsmOperandClass {
def GR32orGR64 : RegisterOperand<GR32> {
let ParserMatchClass = X86GR32orGR64AsmOperand;
}
-
+def AVX512RCOperand : AsmOperandClass {
+ let Name = "AVX512RC";
+}
def AVX512RC : Operand<i32> {
let PrintMethod = "printRoundingControl";
let OperandType = "OPERAND_IMMEDIATE";
+ let ParserMatchClass = AVX512RCOperand;
}
// Sign-extended immediate classes. We don't need to define the full lattice
@@ -713,9 +716,6 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def vectoraddr : ComplexPattern<iPTR, 5, "SelectAddr", [],[SDNPWantParent]>;
-//def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
-
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
@@ -855,11 +855,11 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
return (Imm == X86::COND_E) || (Imm == X86::COND_NE);
}]>;
-let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
- def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
- def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
- def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
-}
+
+def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+
def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index d2929d2..ccdbf0e 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3567,7 +3567,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem, ssmem, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- itins, HasAVX, "SS">, XS, VEX_4V, VEX_LIG;
+ itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3579,7 +3579,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
f64mem, sdmem, sse_load_f64,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
- OpNode, itins, HasAVX, "SD">, XD, VEX_4V, VEX_LIG;
+ OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG;
}
// Square root.
@@ -4077,7 +4077,7 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4123,7 +4123,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
}
} // Predicates = [HasAVX]
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -5902,7 +5902,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
(!cast<I>(OpcPrefix#DQYrr) VR128:$src)>;
// On AVX2, we also support 256bit inputs.
- // FIXME: remove these patterns when the old shuffle lowering goes away.
def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))),
(!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>;
def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))),
@@ -6955,6 +6954,34 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1,
+ OpndItins itins = DEFAULT_ITINS> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
+ itins.rr>, Sched<[itins.Sched]>;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, u8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set RC:$dst,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
@@ -6963,26 +6990,24 @@ let Predicates = [HasAVX] in {
}
let ExeDomain = SSEPackedSingle in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
- int_x86_avx_blend_ps_256, VR256, loadv8f32,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
+ VR128, loadv4f32, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
+ VR256, loadv8f32, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
let ExeDomain = SSEPackedDouble in {
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
- defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
- int_x86_avx_blend_pd_256,VR256, loadv4f64,
- f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>,
- VEX_4V, VEX_L;
+ defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
+ VR128, loadv2f64, f128mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
+ VR256, loadv4f64, f256mem, 0,
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
+ VR128, loadv2i64, i128mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
@@ -7004,9 +7029,9 @@ let Predicates = [HasAVX2] in {
VR256, loadv4i64, i256mem, 0,
DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
}
- defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
- VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+ defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
+ VR256, loadv4i64, i256mem, 0,
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -7016,16 +7041,16 @@ let Constraints = "$src1 = $dst" in {
1, SSE_MPSADBW_ITINS>;
}
let ExeDomain = SSEPackedSingle in
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
- VR128, memopv4f32, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32,
+ VR128, memopv4f32, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
let ExeDomain = SSEPackedDouble in
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
- VR128, memopv2f64, f128mem,
- 1, SSE_INTALU_ITINS_FBLEND_P>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
- VR128, memopv2i64, i128mem,
- 1, SSE_INTALU_ITINS_BLEND_P>;
+ defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64,
+ VR128, memopv2f64, f128mem,
+ 1, SSE_INTALU_ITINS_FBLEND_P>;
+ defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16,
+ VR128, memopv2i64, i128mem,
+ 1, SSE_INTALU_ITINS_BLEND_P>;
let ExeDomain = SSEPackedSingle in
defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
VR128, memopv4f32, f128mem, 1,
@@ -7116,32 +7141,12 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
(v4f64 VR256:$src2))),
(VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-
- def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
- (imm:$mask))),
- (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
- def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
- (imm:$mask))),
- (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
}
let Predicates = [HasAVX2] in {
def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
(v32i8 VR256:$src2))),
(VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
- def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
- (imm:$mask))),
- (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
}
// Patterns
@@ -7260,17 +7265,6 @@ let Predicates = [UseSSE41] in {
def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
(v2f64 VR128:$src2))),
(BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
-
- def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
- (imm:$mask))),
- (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
- (imm:$mask))),
- (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
- def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
- (imm:$mask))),
- (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-
}
let SchedRW = [WriteLoad] in {
@@ -7840,9 +7834,9 @@ def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
WriteFShuffle256>, VEX_L;
let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
- int_x86_avx2_vbroadcasti128, WriteLoad>,
- VEX_L;
+def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256,
+ i128mem, v4i64, loadv2i64,
+ WriteLoad>, VEX_L;
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
@@ -8238,38 +8232,31 @@ let Predicates = [HasF16C] in {
// AVX2 Instructions
//===----------------------------------------------------------------------===//
-/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
-multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop> {
+/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop> {
let isCommutable = 1 in
def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+ [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
Sched<[WriteBlend]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (IntId RC:$src1,
- (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+ (OpVT (OpNode RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>,
Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V;
}
-defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
- VR128, loadv2i64, i128mem>;
-defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
- VR256, loadv4i64, i256mem>, VEX_L;
-
-def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
- imm:$mask)),
- (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
-def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
- imm:$mask)),
- (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
+ VR128, loadv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
+ VR256, loadv4i64, i256mem>, VEX_L;
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
@@ -8608,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
//
def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, u8imm:$src2),
- "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst,
- (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+ "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
let hasSideEffects = 0, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index e436811..42256b2 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
- X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 6af59d4..cd3076d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -77,8 +77,8 @@ namespace llvm {
X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &F) {
MF = &F;
CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(
- *MF->getSubtarget().getInstrInfo(), *MF->getSubtarget().getRegisterInfo(),
- MF->getSubtarget(), MF->getContext()));
+ *MF->getSubtarget().getInstrInfo(),
+ *MF->getSubtarget().getRegisterInfo(), MF->getContext()));
}
void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index cab7ce8..06545bc 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "X86RegisterInfo.h"
+#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
@@ -53,26 +54,26 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
-X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
- : X86GenRegisterInfo(
- (STI.is64Bit() ? X86::RIP : X86::EIP),
- X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true),
- (STI.is64Bit() ? X86::RIP : X86::EIP)),
- Subtarget(STI) {
+X86RegisterInfo::X86RegisterInfo(const Triple &TT)
+ : X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
+ X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true),
+ (TT.isArch64Bit() ? X86::RIP : X86::EIP)) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
// Cache some information.
- Is64Bit = Subtarget.is64Bit();
- IsWin64 = Subtarget.isTargetWin64();
+ Is64Bit = TT.isArch64Bit();
+ IsWin64 = Is64Bit && TT.isOSWindows();
// Use a callee-saved register as the base pointer. These registers must
// not conflict with any ABI requirements. For example, in 32-bit mode PIC
// requires GOT in the EBX register before function calls via PLT GOT pointer.
if (Is64Bit) {
SlotSize = 8;
- bool Use64BitReg =
- Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64();
+ // This matches the simplified 32-bit pointer code in the data layout
+ // computation.
+ // FIXME: Should use the data layout?
+ bool Use64BitReg = TT.getEnvironment() != Triple::GNUX32;
StackPtr = Use64BitReg ? X86::RSP : X86::ESP;
FramePtr = Use64BitReg ? X86::RBP : X86::EBP;
BasePtr = Use64BitReg ? X86::RBX : X86::EBX;
@@ -120,8 +121,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
}
-const TargetRegisterClass*
-X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+const TargetRegisterClass *
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
// Don't allow super-classes of GR8_NOREX. This class is only used after
// extracting sub_8bit_hi sub-registers. The H sub-registers cannot be copied
// to the full GR8 register class in 64-bit mode, so we cannot allow the
@@ -161,6 +163,7 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
@@ -172,9 +175,9 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
case 2: // Available for tailcall (not callee-saved GPRs).
- if (Subtarget.isTargetWin64())
+ if (IsWin64)
return &X86::GR64_TCW64RegClass;
- else if (Subtarget.is64Bit())
+ else if (Is64Bit)
return &X86::GR64_TCRegClass;
const Function *F = MF.getFunction();
@@ -210,7 +213,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case X86::GR64RegClassID:
return 12 - FPDiff;
case X86::VR128RegClassID:
- return Subtarget.is64Bit() ? 10 : 4;
+ return Is64Bit ? 10 : 4;
case X86::VR64RegClassID:
return 4;
}
@@ -218,8 +221,10 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool CallsEHReturn = MF->getMMI().callsEHReturn();
assert(MF && "MachineFunction required");
switch (MF->getFunction()->getCallingConv()) {
@@ -253,11 +258,16 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
break;
+ case CallingConv::X86_64_Win64:
+ return CSR_Win64_SaveList;
+ case CallingConv::X86_64_SysV:
+ if (CallsEHReturn)
+ return CSR_64EHRet_SaveList;
+ return CSR_64_SaveList;
default:
break;
}
- bool CallsEHReturn = MF->getMMI().callsEHReturn();
if (Is64Bit) {
if (IsWin64)
return CSR_Win64_SaveList;
@@ -270,8 +280,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_32_SaveList;
}
-const uint32_t*
-X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+const uint32_t *
+X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
@@ -308,6 +320,10 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
break;
default:
break;
+ case CallingConv::X86_64_Win64:
+ return CSR_Win64_RegMask;
+ case CallingConv::X86_64_SysV:
+ return CSR_64_RegMask;
}
// Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -349,7 +365,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Set the base-pointer register and its aliases as reserved if needed.
if (hasBasePointer(MF)) {
CallingConv::ID CC = MF.getFunction()->getCallingConv();
- const uint32_t* RegMask = getCallPreservedMask(CC);
+ const uint32_t *RegMask = getCallPreservedMask(MF, CC);
if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
report_fatal_error(
"Stack realignment in presence of dynamic allocas is not supported with"
@@ -393,7 +409,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*AI);
}
}
- if (!Is64Bit || !Subtarget.hasAVX512()) {
+ if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasAVX512()) {
for (unsigned n = 16; n != 32; ++n) {
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
Reserved.set(*AI);
@@ -486,6 +502,24 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+ // FRAME_ALLOC uses a single offset, with no register. It only works in the
+ // simple FP case, and doesn't work with stack realignment. On 32-bit, the
+ // offset is from the traditional base pointer location. On 64-bit, the
+ // offset is from the SP at the end of the prologue, not the FP location. This
+ // matches the behavior of llvm.frameaddress.
+ if (Opc == TargetOpcode::FRAME_ALLOC) {
+ MachineOperand &FI = MI.getOperand(FIOperandNum);
+ bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ int Offset;
+ if (IsWinEH)
+ Offset = static_cast<const X86FrameLowering *>(TFI)
+ ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+ else
+ Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ FI.ChangeToImmediate(Offset);
+ return;
+ }
+
// For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
@@ -537,8 +571,9 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
-unsigned X86RegisterInfo::getPtrSizedFrameRegister(
- const MachineFunction &MF) const {
+unsigned
+X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
+ const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
unsigned FrameReg = getFrameRegister(MF);
if (Subtarget.isTarget64BitILP32())
FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 406b1fc..74edab9 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -20,14 +20,7 @@
#include "X86GenRegisterInfo.inc"
namespace llvm {
- class Type;
- class TargetInstrInfo;
- class X86Subtarget;
-
class X86RegisterInfo final : public X86GenRegisterInfo {
-public:
- const X86Subtarget &Subtarget;
-
private:
/// Is64Bit - Is the target 64-bits.
///
@@ -55,7 +48,7 @@ private:
unsigned BasePtr;
public:
- X86RegisterInfo(const X86Subtarget &STI);
+ X86RegisterInfo(const Triple &TT);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
@@ -76,8 +69,9 @@ public:
getSubClassWithSubReg(const TargetRegisterClass *RC,
unsigned Idx) const override;
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
@@ -98,7 +92,8 @@ public:
/// callee-save registers on this target.
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 61c0600..677e824 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -2014,7 +2014,7 @@ def : InstRW<[WriteFMADDr],
// 3p forms.
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
// 3s forms.
- "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r",
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
// 4s/4s_int forms.
"VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
// 4p forms.
@@ -2031,7 +2031,7 @@ def : InstRW<[WriteFMADDm],
// 3p forms.
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
// 3s forms.
- "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m",
+ "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
// 4s/4s_int forms.
"VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
// 4p forms.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7feabf6..ca8fc9c 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -62,8 +62,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
#ifndef NDEBUG
// If the base register might conflict with our physical registers, bail out.
- unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
- X86::ECX, X86::EAX, X86::EDI};
+ const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
+ X86::ECX, X86::EAX, X86::EDI};
assert(!isBaseRegConflictPossible(DAG, ClobberSet));
#endif
@@ -228,8 +228,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
// If the base register might conflict with our physical registers, bail out.
- unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
- X86::ECX, X86::ESI, X86::EDI};
+ const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI,
+ X86::ECX, X86::ESI, X86::EDI};
if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 4bde053..43d3895 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -37,10 +37,10 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<TargetLoweringObjectFileMachO>();
}
- if (TT.isOSLinux())
- return make_unique<X86LinuxTargetObjectFile>();
+ if (TT.isOSLinux() || TT.isOSNaCl())
+ return make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSBinFormatELF())
- return make_unique<TargetLoweringObjectFileELF>();
+ return make_unique<X86ELFTargetObjectFile>();
if (TT.isKnownWindowsMSVCEnvironment())
return make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
@@ -94,9 +94,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(T, computeDataLayout(Triple(TT)), TT, CPU, FS, Options,
+ RM, CM, OL),
TLOF(createTLOF(Triple(getTargetTriple()))),
- DL(computeDataLayout(Triple(TT))),
Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 283858d..c9833ed 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -24,8 +24,6 @@ class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- // Calculates type size & alignment
- const DataLayout DL;
X86Subtarget Subtarget;
mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
@@ -35,8 +33,6 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~X86TargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 1d1c32e..d65d3b0 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -15,17 +15,13 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
using namespace dwarf;
-X86_64MachoTargetObjectFile::X86_64MachoTargetObjectFile()
- : TargetLoweringObjectFileMachO() {
- SupportIndirectSymViaGOTPCRel = true;
-}
-
const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
@@ -52,28 +48,30 @@ MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
}
const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, int64_t Offset) const {
+ const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// On Darwin/X86-64, we need to use foo@GOTPCREL+4 to access the got entry
// from a data section. In case there's an additional offset, then use
// foo@GOTPCREL+4+<offset>.
+ unsigned FinalOff = Offset+MV.getConstant()+4;
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
- const MCExpr *Off = MCConstantExpr::Create(Offset+4, getContext());
+ const MCExpr *Off = MCConstantExpr::Create(FinalOff, getContext());
return MCBinaryExpr::CreateAdd(Res, Off, getContext());
}
+const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
+ const MCSymbol *Sym) const {
+ return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
+}
+
void
-X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
-const MCExpr *
-X86LinuxTargetObjectFile::getDebugThreadLocalSymbol(
- const MCSymbol *Sym) const {
- return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_DTPOFF, getContext());
-}
-
const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
const ConstantExpr *CE, Mangler &Mang, const TargetMachine &TM) const {
// We are looking for the difference of two symbols, need a subtraction
@@ -97,14 +95,12 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
SubRHS->getPointerAddressSpace() != 0)
return nullptr;
- // Both ptrtoint instructions must wrap global variables:
+ // Both ptrtoint instructions must wrap global objects:
// - Only global variables are eligible for image relative relocations.
- // - The subtrahend refers to the special symbol __ImageBase, a global.
- const GlobalVariable *GVLHS =
- dyn_cast<GlobalVariable>(SubLHS->getPointerOperand());
- const GlobalVariable *GVRHS =
- dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
- if (!GVLHS || !GVRHS)
+ // - The subtrahend refers to the special symbol __ImageBase, a GlobalVariable.
+ const auto *GOLHS = dyn_cast<GlobalObject>(SubLHS->getPointerOperand());
+ const auto *GVRHS = dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
+ if (!GOLHS || !GVRHS)
return nullptr;
// We expect __ImageBase to be a global variable without a section, externally
@@ -117,10 +113,10 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
return nullptr;
// An image-relative, thread-local, symbol makes no sense.
- if (GVLHS->isThreadLocal())
+ if (GOLHS->isThreadLocal())
return nullptr;
- return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang),
+ return MCSymbolRefExpr::Create(TM.getSymbol(GOLHS, Mang),
MCSymbolRefExpr::VK_COFF_IMGREL32,
getContext());
}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index f745538..2e25fb2 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -19,8 +19,6 @@ namespace llvm {
/// x86-64.
class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
public:
- X86_64MachoTargetObjectFile();
-
const MCExpr *
getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
Mangler &Mang, const TargetMachine &TM,
@@ -33,20 +31,25 @@ namespace llvm {
const TargetMachine &TM,
MachineModuleInfo *MMI) const override;
- const MCExpr *
- getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
- int64_t Offset) const override;
+ const MCExpr *getIndirectSymViaGOTPCRel(const MCSymbol *Sym,
+ const MCValue &MV, int64_t Offset,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
};
- /// X86LinuxTargetObjectFile - This implementation is used for linux x86
- /// and x86-64.
- class X86LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-
+ /// \brief This implemenatation is used for X86 ELF targets that don't
+ /// have a further specialization.
+ class X86ELFTargetObjectFile : public TargetLoweringObjectFileELF {
/// \brief Describe a TLS variable address within debug info.
const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
};
+ /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and
+ /// Native Client on x86 and x86-64.
+ class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile {
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ };
+
/// \brief This implementation is used for Windows targets on x86 and x86-64.
class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF {
const MCExpr *
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 4073549..d0a09b2 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -126,15 +126,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
}
}
-static MCStreamer *
-createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useDwarfDirectory,
- MCInstPrinter *InstPrint, MCCodeEmitter *CE,
- MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S = llvm::createAsmStreamer(
- Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
- new XCoreTargetAsmStreamer(*S, OS);
- return S;
+static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new XCoreTargetAsmStreamer(S, OS);
}
// Force static initialization.
@@ -160,5 +156,6 @@ extern "C" void LLVMInitializeXCoreTargetMC() {
TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
createXCoreMCInstPrinter);
- TargetRegistry::RegisterAsmStreamer(TheXCoreTarget, createXCoreMCAsmStreamer);
+ TargetRegistry::RegisterAsmTargetStreamer(TheXCoreTarget,
+ createTargetAsmStreamer);
}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 0ff5961..28e0275 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -14,6 +14,8 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
+#include "llvm/Support/DataTypes.h"
+
namespace llvm {
class Target;
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index f79b78b..5c7ea5e 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ namespace {
// Complex Pattern Selectors.
bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
const char *getPassName() const override {
@@ -108,12 +108,12 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
}
bool XCoreDAGToDAGISel::
-SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) {
SDValue Reg;
- switch (ConstraintCode) {
+ switch (ConstraintID) {
default: return true;
- case 'm': // Memory.
+ case InlineAsm::Constraint_m: // Memory.
switch (Op.getOpcode()) {
default: return true;
case XCoreISD::CPRelativeWrapper:
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 213ae4a..b20fc01 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -177,6 +177,12 @@ namespace llvm {
const std::string &Constraint,
MVT VT) const override;
+ unsigned getInlineAsmMemConstraint(
+ const std::string &ConstraintCode) const override {
+ // FIXME: Map different constraints differently.
+ return InlineAsm::Constraint_m;
+ }
+
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 5c666ae..1d569e8 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -208,8 +208,8 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
MF.getFunction()->needsUnwindTableEntry();
}
-const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
+const MCPhysReg *
+XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// The callee saved registers LR & FP are explicitly handled during
// emitPrologue & emitEpilogue and related functions.
static const MCPhysReg CalleeSavedRegs[] = {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5d7721c..010fccd 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -29,8 +29,7 @@ public:
/// Code Generation virtual methods...
- const MCPhysReg *
- getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 7998fc1..228dc1c 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -27,9 +27,10 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ : LLVMTargetMachine(
+ T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
+ TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<XCoreTargetObjectFile>()),
- DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c5df07c..0d324ab 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -21,7 +21,6 @@ namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- const DataLayout DL; // Calculates type size & alignment
XCoreSubtarget Subtarget;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
@@ -30,8 +29,10 @@ public:
CodeGenOpt::Level OL);
~XCoreTargetMachine() override;
- const DataLayout *getDataLayout() const override { return &DL; }
- const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const XCoreSubtarget *getSubtargetImpl(const Function &) const override {
+ return &Subtarget;
+ }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;