aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
committerStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
commitc6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree81b7dd2bb4370a392f31d332a566c903b5744764 /lib/Target
parent19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
downloadexternal_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
Update LLVM for rebase to r212749.
Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td3
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp21
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp51
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp7
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp279
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h5
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td65
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp48
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td89
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp81
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp4
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td4
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td304
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td512
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp11
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp28
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h34
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h27
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp64
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp461
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp3
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp33
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp40
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Android.mk3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h20
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp89
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp38
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h9
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp14
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp18
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h6
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp17
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp205
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td35
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td165
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td9
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMJITInfo.h8
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h13
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h6
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp126
-rw-r--r--lib/Target/ARM/ARMSubtarget.h52
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp164
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h119
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp67
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp773
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp31
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp235
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp3
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h9
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h6
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp767
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h27
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h32
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h9
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp11
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h8
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h11
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp19
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h25
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp14
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h36
-rw-r--r--lib/Target/Mips/Android.mk1
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp781
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp172
-rw-r--r--lib/Target/Mips/MCTargetDesc/Android.mk1
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp60
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h237
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp18
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp41
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp32
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp36
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp171
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td12
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td1
-rw-r--r--lib/Target/Mips/Mips.td5
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp4
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h3
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp6
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp7
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td8
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td177
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td441
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td97
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td163
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp56
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h6
-rw-r--r--lib/Target/Mips/MipsCallingConv.td5
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp7
-rw-r--r--lib/Target/Mips/MipsCondMov.td146
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td4
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp8
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp175
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h1
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp3
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp247
-rw-r--r--lib/Target/Mips/MipsISelLowering.h15
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td116
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td52
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td313
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp62
-rw-r--r--lib/Target/Mips/MipsMSAInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp11
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td16
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h3
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp24
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h12
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp57
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h3
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp145
-rw-r--r--lib/Target/Mips/MipsSubtarget.h72
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp86
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h71
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h108
-rw-r--r--lib/Target/NVPTX/NVPTX.td7
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp72
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp24
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h8
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp532
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp797
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h19
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h3
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td348
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td418
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td7
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp57
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h35
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp66
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h42
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp69
-rw-r--r--lib/Target/NVPTX/cl_common_defines.h6
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp103
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp46
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp32
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h2
-rw-r--r--lib/Target/PowerPC/PPC.td10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp24
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp38
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp168
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h198
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp11
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1002
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h34
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td62
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td14
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td3
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h43
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp58
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h29
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp51
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h35
-rw-r--r--lib/Target/R600/AMDGPU.h8
-rw-r--r--lib/Target/R600/AMDGPU.td46
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp70
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h23
-rw-r--r--lib/Target/R600/AMDGPUConvertToISA.cpp62
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.cpp2
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp209
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp987
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h95
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp28
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h19
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td69
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td97
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.cpp (renamed from lib/Target/R600/AMDILIntrinsicInfo.cpp)40
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.h (renamed from lib/Target/R600/AMDILIntrinsicInfo.h)21
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td29
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp1
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h4
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp387
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp4
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h14
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp100
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h100
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp13
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h7
-rw-r--r--lib/Target/R600/AMDILBase.td25
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp560
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td150
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td224
-rw-r--r--lib/Target/R600/AMDILRegisterInfo.td107
-rw-r--r--lib/Target/R600/CMakeLists.txt6
-rw-r--r--lib/Target/R600/EvergreenInstructions.td11
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp15
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp14
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp1
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp314
-rw-r--r--lib/Target/R600/R600ISelLowering.h9
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp90
-rw-r--r--lib/Target/R600/R600InstrInfo.h18
-rw-r--r--lib/Target/R600/R600Instructions.td158
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp1
-rw-r--r--lib/Target/R600/R600Packetizer.cpp1
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp17
-rw-r--r--lib/Target/R600/R600RegisterInfo.h12
-rw-r--r--lib/Target/R600/R600RegisterInfo.td48
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp46
-rw-r--r--lib/Target/R600/SIDefines.h50
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp110
-rw-r--r--lib/Target/R600/SIISelLowering.cpp303
-rw-r--r--lib/Target/R600/SIISelLowering.h10
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp2
-rw-r--r--lib/Target/R600/SIInstrFormats.td26
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp223
-rw-r--r--lib/Target/R600/SIInstrInfo.h12
-rw-r--r--lib/Target/R600/SIInstrInfo.td152
-rw-r--r--lib/Target/R600/SIInstructions.td868
-rw-r--r--lib/Target/R600/SIIntrinsics.td50
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp87
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp34
-rw-r--r--lib/Target/R600/SIRegisterInfo.h19
-rw-r--r--lib/Target/R600/SIRegisterInfo.td2
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp3
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp153
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp36
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h2
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp11
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h9
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp4
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h2
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp52
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h26
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp32
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h36
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp172
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td10
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h8
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp63
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h3
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td7
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td119
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td44
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td29
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td12
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp29
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td25
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp28
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h26
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h26
-rw-r--r--lib/Target/TargetMachine.cpp15
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp13
-rw-r--r--lib/Target/X86/Android.mk1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp371
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp287
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h42
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp46
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp17
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp6
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td7
-rw-r--r--lib/Target/X86/X86AtomicExpandPass.cpp287
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp17
-rw-r--r--lib/Target/X86/X86FastISel.cpp1265
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp175
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp407
-rw-r--r--lib/Target/X86/X86FrameLowering.h20
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp32
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp3517
-rw-r--r--lib/Target/X86/X86ISelLowering.h31
-rw-r--r--lib/Target/X86/X86InstrAVX512.td301
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td6
-rw-r--r--lib/Target/X86/X86InstrCompiler.td139
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp136
-rw-r--r--lib/Target/X86/X86InstrInfo.h24
-rw-r--r--lib/Target/X86/X86InstrInfo.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td252
-rw-r--r--lib/Target/X86/X86InstrSystem.td5
-rw-r--r--lib/Target/X86/X86JITInfo.cpp6
-rw-r--r--lib/Target/X86/X86JITInfo.h8
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp35
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp54
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp57
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h8
-rw-r--r--lib/Target/X86/X86Subtarget.cpp59
-rw-r--r--lib/Target/X86/X86Subtarget.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp62
-rw-r--r--lib/Target/X86/X86TargetMachine.h30
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp167
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp6
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp49
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h5
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp9
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp10
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h21
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp9
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h28
388 files changed, 20335 insertions, 9870 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 1ad5ac8..e6a27c3 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -60,6 +60,7 @@ def AArch64InstrInfo : InstrInfo;
// AArch64 Processors supported.
//
include "AArch64SchedA53.td"
+include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
@@ -89,7 +90,7 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureCRC]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 04906f6..ab2c4b7 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -214,8 +214,8 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
if (SExt->getType() != ConsideredSExtType)
return false;
- for (const Use &U : SExt->uses()) {
- if (isa<GetElementPtrInst>(*U))
+ for (const User *U : SExt->users()) {
+ if (isa<GetElementPtrInst>(U))
return true;
}
@@ -267,8 +267,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
}
// Now try to get through the chain of definitions.
- while (isa<Instruction>(SExt->getOperand(0))) {
- Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
+ while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) {
DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
// We cannot get through something that is not an Instruction
@@ -285,10 +284,10 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
// assertion on the type as all involved sext operation may have not
// been moved yet.
while (!Inst->use_empty()) {
- Value::use_iterator UseIt = Inst->use_begin();
- Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
- assert(UseInst && "Use of sext is not an Instruction!");
- UseInst->setOperand(UseIt->getOperandNo(), SExt);
+ Use &U = *Inst->use_begin();
+ Instruction *User = dyn_cast<Instruction>(U.getUser());
+ assert(User && "User of sext is not an Instruction!");
+ User->setOperand(U.getOperandNo(), SExt);
}
ToRemove.insert(Inst);
SExt->setOperand(0, Inst->getOperand(0));
@@ -385,11 +384,11 @@ void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
if (ToRemove.count(Inst))
continue;
bool inserted = false;
- for (auto Pt : CurPts) {
+ for (auto &Pt : CurPts) {
if (DT.dominates(Inst, Pt)) {
DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
<< *Inst << '\n');
- (Pt)->replaceAllUsesWith(Inst);
+ Pt->replaceAllUsesWith(Inst);
ToRemove.insert(Pt);
Pt = Inst;
inserted = true;
@@ -436,7 +435,7 @@ void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
bool insert = false;
// #1.
- for (const Use &U : SExt->uses()) {
+ for (const User *U : SExt->users()) {
const Instruction *Inst = dyn_cast<GetElementPtrInst>(U);
if (Inst && Inst->getNumOperands() > 2) {
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c3ee9bb..cd94e24 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
default:
- assert(0 && "<unknown operand type>");
+ llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 5209452..484e7e8 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) {
static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBZW:
case AArch64::TBNZW:
case AArch64::TBZX:
@@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
static unsigned getOppositeConditionOpcode(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW: return AArch64::TBZW;
case AArch64::TBNZX: return AArch64::TBZX;
case AArch64::TBZW: return AArch64::TBNZW;
@@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW:
case AArch64::TBZW:
case AArch64::TBNZX:
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index ded2e17..8e8bd3d 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -18,9 +18,6 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
-class CCIfUnallocated<string Reg, CCAction A> :
- CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>;
-
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8], CCAssignToStack<1, 1>>,
- CCIfType<[i16], CCAssignToStack<2, 2>>,
+ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
+ CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
@@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// 32bit quantity as undef.
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
// Pass the remaining arguments on the stack instead.
- CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index c3b5369..2164d77 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
- // We can't handle thread-local variables quickly yet. Unfortunately we have
- // to peer through any aliases to find out if that rule applies.
- const GlobalValue *TLSGV = GV;
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- TLSGV = GA->getAliasee();
+ // We can't handle thread-local variables quickly yet.
+ if (GV->isThreadLocal())
+ return 0;
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
return 0;
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
- if (GVar->isThreadLocal())
- return 0;
-
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(GV->getType(), true);
@@ -469,11 +463,18 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
break;
}
- // FIXME: If this is a stack pointer and the offset needs to be simplified
- // then put the alloca address into a register, set the base type back to
- // register and continue. This should almost never happen.
+ //If this is a stack pointer and the offset needs to be simplified then put
+ // the alloca address into a register, set the base type back to register and
+ // continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- return false;
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
+ ResultReg)
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(ResultReg);
}
// Since the offset is too large for the load/store instruction get the
@@ -1224,7 +1225,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
@@ -1235,7 +1235,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
default:
@@ -1254,7 +1253,7 @@ bool AArch64FastISel::ProcessCallArgs(
assert(VA.isMemLoc() && "Assuming store on stack.");
// Need to store on the stack.
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
unsigned BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
@@ -1468,10 +1467,12 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
- assert(RV == true && "Should be able to handle this load.");
+ if (!RV)
+ return false;
+
RV = EmitStore(VT, ResultReg, Dest);
- assert(RV == true && "Should be able to handle this store.");
- (void)RV;
+ if (!RV)
+ return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
@@ -1749,6 +1750,17 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
+
+ // FastISel does not have plumbing to deal with extensions where the SrcVT or
+ // DestVT are odd things, so test to make sure that they are both types we can
+ // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
+ // bail out to SelectionDAG.
+ if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
+ (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
+ ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
+ (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
+ return 0;
+
unsigned Opc;
unsigned Imm = 0;
@@ -1895,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) {
case MVT::i32:
ZReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
+ SrcVT = MVT::i32;
break;
case MVT::i64:
ZReg = AArch64::XZR;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index deb306a..9c33717 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -158,7 +158,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
@@ -204,8 +204,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 0e00d16..7686e6f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -18,18 +18,11 @@
namespace llvm {
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
class AArch64FrameLowering : public TargetFrameLowering {
- const AArch64TargetMachine &TM;
-
public:
- explicit AArch64FrameLowering(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI)
+ explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/),
- TM(TM) {}
+ false /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7007ffc..3f49fab 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -153,9 +153,6 @@ public:
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
- SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
@@ -596,8 +593,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
const GlobalValue *GV = GAN->getGlobal();
unsigned Alignment = GV->getAlignment();
const DataLayout *DL = TLI->getDataLayout();
- if (Alignment == 0 && !Subtarget->isTargetDarwin())
- Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
+ Type *Ty = GV->getType()->getElementType();
+ if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
+ Alignment = DL->getABITypeAlignment(Ty);
if (Alignment >= Size)
return true;
@@ -2111,7 +2109,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
.getVectorElementType()
.getSizeInBits()) {
default:
- assert(0 && "Unexpected vector element type!");
+ llvm_unreachable("Unexpected vector element type!");
case 64:
SubReg = AArch64::dsub;
break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80d6669..28d0035 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -67,15 +67,15 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
//===----------------------------------------------------------------------===//
// AArch64 Lowering public interface.
//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new AArch64_MachoTargetObjectFile();
return new AArch64_ELFTargetObjectFile();
}
-AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
@@ -627,7 +627,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
// FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
+ // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
return 4095;
}
@@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
#ifndef NDEBUG
MI->dump();
#endif
- assert(0 && "Unexpected instruction for custom inserter!");
- break;
+ llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
@@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
}
- llvm_unreachable("Unexpected instruction for custom inserter!");
}
//===----------------------------------------------------------------------===//
@@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
- assert(0 && "Invalid code");
+ llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
@@ -1387,24 +1385,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
- // FP_TO_XINT conversion from the same type are legal.
- if (VT.getSizeInBits() == InVT.getSizeInBits())
- return Op;
-
- if (InVT == MVT::v2f64 || InVT == MVT::v4f32) {
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
- } else if (InVT == MVT::v2f32) {
+ }
+
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
- return SDValue();
+ return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
@@ -1440,32 +1436,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
- // v2i32 to v2f32 is legal.
- if (VT == MVT::v2f32 && InVT == MVT::v2i32)
- return Op;
-
- // This function only handles v2f64 outputs.
- if (VT == MVT::v2f64) {
- // Extend the input argument to a v2i64 that we can feed into the
- // floating point conversion. Zero or sign extend based on whether
- // we're doing a signed or unsigned float conversion.
- unsigned Opc =
- Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
- assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
- SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
+ MVT CastVT =
+ MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
+ InVT.getVectorNumElements());
+ In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
}
- // Scalarize v2i64 to v2f32 conversions.
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
- SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
- DAG.getConstant(i, MVT::i64));
- Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
- BuildVectorOps.push_back(Sclr);
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
+ unsigned CastOpc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ EVT CastVT = VT.changeVectorElementTypeToInteger();
+ In = DAG.getNode(CastOpc, dl, CastVT, In);
+ return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
+ return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
@@ -1516,7 +1503,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -1711,7 +1698,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
InVals.push_back(FrameIdxN);
continue;
- } if (VA.isRegLoc()) {
+ }
+
+ if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@@ -1772,10 +1761,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue ArgValue;
+ // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ MVT MemVT = VA.getValVT();
+
switch (VA.getLocInfo()) {
default:
break;
+ case CCValAssign::BCvt:
+ MemVT = VA.getLocVT();
+ break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
@@ -1787,10 +1782,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN,
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- VA.getLocVT(),
- false, false, false, 0);
+ MemVT, false, false, false, nullptr);
InVals.push_back(ArgValue);
}
@@ -2339,11 +2333,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
- if (Arg.getValueType().isSimple() &&
- Arg.getValueType().getSimpleVT() == MVT::i32 &&
- (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
- VA.getLocVT() == MVT::i16))
- Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
+ if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
+ VA.getValVT() == MVT::i16)
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store =
DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
@@ -4116,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4164,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
int VEXTOffsets[2] = { 0, 0 };
+ int OffsetMultipliers[2] = { 1, 1 };
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
+ unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+ SDValue CurSource = SourceVecs[i];
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+ // Then bitcast it to the vector which holds asserted element type,
+ // and record the multiplier of element width between SourceVecs and
+ // Build_vector which is needed to extract the correct lanes later.
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ VT.getVectorElementType().getSizeInBits());
+
+ CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+ OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+ NumSrcElts *= OffsetMultipliers[i];
+ MaxElts[i] *= OffsetMultipliers[i];
+ MinElts[i] *= OffsetMultipliers[i];
+ }
+
+ if (CurSource.getValueType() == VT) {
// No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
+ ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ } else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
- DAG.getUNDEF(SourceVecs[i].getValueType()));
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
// Since only 64-bit and 128-bit vectors are legal on ARM and
// we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4203,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
DAG.getConstant(Imm, MVT::i32));
@@ -4238,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int ExtractElt =
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
} else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+ VEXTOffsets[1]);
}
}
@@ -5177,11 +5181,37 @@ FailedModImm:
return Op;
}
+// Normalize the operands of BUILD_VECTOR. The value of constant operands will
+// be truncated to fit element width.
+static SDValue NormalizeBuildVector(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT EltTy= VT.getVectorElementType();
+
+ if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
+ return Op;
+
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
+ SDValue Lane = Op.getOperand(I);
+ if (Lane.getOpcode() == ISD::Constant) {
+ APInt LowBits(EltTy.getSizeInBits(),
+ cast<ConstantSDNode>(Lane)->getZExtValue());
+ Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+ }
+ Ops.push_back(Lane);
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc dl(Op);
EVT VT = Op.getValueType();
+ Op = NormalizeBuildVector(Op, DAG);
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
@@ -6047,18 +6077,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -6068,18 +6094,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -6092,8 +6114,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
- return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
- VT2.isInteger() && VT1.getSizeInBits() <= 32);
+ return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
+ VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
+ VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
@@ -6346,23 +6369,45 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
APInt Value = C->getAPIntValue();
EVT VT = N->getValueType(0);
- APInt VP1 = Value + 1;
- if (VP1.isPowerOf2()) {
- // Multiplying by one less than a power of two, replace with a shift
- // and a subtract.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- }
- APInt VM1 = Value - 1;
- if (VM1.isPowerOf2()) {
- // Multiplying by one more than a power of two, replace with a shift
- // and an add.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ if (Value.isNonNegative()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ APInt VM1 = Value - 1;
+ if (VM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ APInt VP1 = Value + 1;
+ if (VP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ } else {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt VNM1 = -Value - 1;
+ if (VNM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNM1.logBase2(), MVT::i64));
+ SDValue Add =
+ DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
+ }
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
}
}
return SDValue();
@@ -6687,7 +6732,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
- assert(0 && "unexpected vector type!");
+ llvm_unreachable("unexpected vector type!");
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
@@ -7020,7 +7065,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
+ else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, MVT::i32));
@@ -7867,6 +7912,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
+TargetLoweringBase::LegalizeTypeAction
+AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
+ MVT SVT = VT.getSimpleVT();
+ // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
+ // v4i16, v2i32 instead of to promote.
+ if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
+ || SVT == MVT::v1f32)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index de16c4d..cb0b9ef 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -197,7 +197,7 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+ explicit AArch64TargetLowering(TargetMachine &TM);
/// Selects the correct CCAssignFn for a the given CallingConvention
/// value.
@@ -324,6 +324,9 @@ public:
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
+
private:
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d455d7e..5007172 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(enc, MVT::i32);
}]>;
-def LogicalImm32Operand : AsmOperandClass {
- let Name = "LogicalImm32";
- let DiagnosticType = "LogicalSecondSource";
-}
-def LogicalImm64Operand : AsmOperandClass {
- let Name = "LogicalImm64";
- let DiagnosticType = "LogicalSecondSource";
+let DiagnosticType = "LogicalSecondSource" in {
+ def LogicalImm32Operand : AsmOperandClass {
+ let Name = "LogicalImm32";
+ }
+ def LogicalImm64Operand : AsmOperandClass {
+ let Name = "LogicalImm64";
+ }
+ def LogicalImm32NotOperand : AsmOperandClass {
+ let Name = "LogicalImm32Not";
+ }
+ def LogicalImm64NotOperand : AsmOperandClass {
+ let Name = "LogicalImm64Not";
+ }
}
def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
@@ -468,6 +474,12 @@ def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
let PrintMethod = "printLogicalImm64";
let ParserMatchClass = LogicalImm64Operand;
}
+def logical_imm32_not : Operand<i32> {
+ let ParserMatchClass = LogicalImm32NotOperand;
+}
+def logical_imm64_not : Operand<i64> {
+ let ParserMatchClass = LogicalImm64NotOperand;
+}
// imm0_65535 predicate - True if the immediate is in the range [0,65535].
def Imm0_65535Operand : AsmImmRange<0, 65535>;
@@ -963,8 +975,14 @@ def ccode : Operand<i32> {
let ParserMatchClass = CondCode;
}
def inv_ccode : Operand<i32> {
+ // AL and NV are invalid in the aliases which use inv_ccode
let PrintMethod = "printInverseCondCode";
let ParserMatchClass = CondCode;
+ let MCOperandPredicate = [{
+ return MCOp.isImm() &&
+ MCOp.getImm() != AArch64CC::AL &&
+ MCOp.getImm() != AArch64CC::NV;
+ }];
}
// Conditional branch target. 19-bit immediate. The low two bits of the target
@@ -1323,13 +1341,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
- Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
}
@@ -1339,7 +1357,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm,
: BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra,
(mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -1738,6 +1756,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
// Register/register aliases with no shift when SP is not used.
def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
@@ -1925,22 +1947,32 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
: InstAlias<asm#" $dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
-let AddedComplexity = 6 in
-multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
+ let AddedComplexity = 6 in
def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
[(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
logical_imm32:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
}
+ let AddedComplexity = 6 in
def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
[(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
logical_imm64:$imm))]> {
let Inst{31} = 1;
}
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
-multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
let isCompare = 1, Defs = [NZCV] in {
def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
[(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
@@ -1952,6 +1984,13 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
let Inst{31} = 1;
}
} // end Defs = [NZCV]
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ff115c0..ce85b2c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MCInstrDesc &Desc = MI->getDesc();
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ if (MI->getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ const MCInstrDesc &Desc = MI->getDesc();
switch (Desc.getOpcode()) {
default:
// Anything not explicitly designated otherwise is a nomal 4-byte insn.
@@ -1224,7 +1230,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
@@ -1385,7 +1391,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -1406,7 +1412,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
@@ -1423,7 +1429,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
@@ -1440,7 +1446,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
@@ -1461,7 +1467,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
@@ -1577,39 +1583,39 @@ void AArch64InstrInfo::storeRegToStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::STRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d, Offset = false;
}
@@ -1675,39 +1681,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d, Offset = false;
}
@@ -1726,7 +1732,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
*OutUnscaledOp = 0;
switch (MI.getOpcode()) {
default:
- assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+ llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
// Vector spills/fills can't take an immediate offset.
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 90ce75f..f70b82b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -44,8 +44,6 @@ public:
/// always be able to get register info as well (through this method).
const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
- const AArch64Subtarget &getSubTarget() const { return Subtarget; }
-
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -168,7 +166,7 @@ private:
/// if necessary, to be replaced by the scavenger at the end of PEI.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetNZCV = false);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 9ad36e8..1211fba 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -323,7 +323,7 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// System instructions.
//===----------------------------------------------------------------------===//
-def HINT : HintI<"hint">;
+def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
@@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
//===----------------------------------------------------------------------===//
// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
-defm AND : LogicalImm<0b00, "and", and>;
-defm EOR : LogicalImm<0b10, "eor", xor>;
-defm ORR : LogicalImm<0b01, "orr", or>;
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
+defm AND : LogicalImm<0b00, "and", and, "bic">;
+defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
+defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
@@ -737,6 +737,10 @@ def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit">;
+
+def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
+def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
+
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
@@ -2238,6 +2242,81 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
[(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
+// When converting from f16 coming directly from a load, make sure we
+// load into the FPR16 registers rather than going through the GPRs.
+// f16->f32
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))),
+ (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))),
+ (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// f16->f64
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))))),
+ (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))))),
+ (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
+ (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
+ (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// When converting to f16 going directly to a store, make sure we use the
+// appropriate direct conversion instructions and store via the FPR16
+// registers rather than going through the GPRs.
+let AddedComplexity = 10 in {
+// f32->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+// f64->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+}
+
+
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e7454be..3df9c4f 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -40,14 +40,13 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
-static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20),
- cl::Hidden);
+static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
+ cl::init(20), cl::Hidden);
// Place holder while testing unscaled load/store combining
-static cl::opt<bool>
-EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow AArch64 unscaled load/store combining"),
- cl::init(true));
+static cl::opt<bool> EnableAArch64UnscaledMemOp(
+ "aarch64-unscaled-mem-op", cl::Hidden,
+ cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
namespace {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -60,19 +59,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
- // If a matching instruction is found, mergeForward is set to true if the
+ // If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward,
+ bool &MergeForward,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
- // If mergeForward is true, erase the first instruction and fold its
+ // If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool mergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -142,7 +141,7 @@ static bool isUnscaledLdst(unsigned Opc) {
int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
switch (MemMI->getOpcode()) {
default:
- llvm_unreachable("Opcode has has unknown size!");
+ llvm_unreachable("Opcode has unknown size!");
case AArch64::STRSui:
case AArch64::STURSi:
return 4;
@@ -217,16 +216,26 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
- case AArch64::STRSui: return AArch64::STRSpre;
- case AArch64::STRDui: return AArch64::STRDpre;
- case AArch64::STRQui: return AArch64::STRQpre;
- case AArch64::STRWui: return AArch64::STRWpre;
- case AArch64::STRXui: return AArch64::STRXpre;
- case AArch64::LDRSui: return AArch64::LDRSpre;
- case AArch64::LDRDui: return AArch64::LDRDpre;
- case AArch64::LDRQui: return AArch64::LDRQpre;
- case AArch64::LDRWui: return AArch64::LDRWpre;
- case AArch64::LDRXui: return AArch64::LDRXpre;
+ case AArch64::STRSui:
+ return AArch64::STRSpre;
+ case AArch64::STRDui:
+ return AArch64::STRDpre;
+ case AArch64::STRQui:
+ return AArch64::STRQpre;
+ case AArch64::STRWui:
+ return AArch64::STRWpre;
+ case AArch64::STRXui:
+ return AArch64::STRXpre;
+ case AArch64::LDRSui:
+ return AArch64::LDRSpre;
+ case AArch64::LDRDui:
+ return AArch64::LDRDpre;
+ case AArch64::LDRQui:
+ return AArch64::LDRQpre;
+ case AArch64::LDRWui:
+ return AArch64::LDRWpre;
+ case AArch64::LDRXui:
+ return AArch64::LDRXpre;
}
}
@@ -260,7 +269,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool mergeForward) {
+ bool MergeForward) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -276,12 +285,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
// Insert our new paired instruction after whichever of the paired
- // instructions mergeForward indicates.
- MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
- // Also based on mergeForward is from where we copy the base register operand
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ // Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
MachineOperand &BaseRegOp =
- mergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ MergeForward ? Paired->getOperand(1) : I->getOperand(1);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
@@ -355,8 +364,8 @@ static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
if (IsUnscaled) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
- int elemOffset = Offset / OffsetStride;
- if (elemOffset > 63 || elemOffset < -64)
+ int ElemOffset = Offset / OffsetStride;
+ if (ElemOffset > 63 || ElemOffset < -64)
return false;
}
return true;
@@ -374,14 +383,14 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward, unsigned Limit) {
+ bool &MergeForward, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
++MBBI;
int Opc = FirstMI->getOpcode();
- bool mayLoad = FirstMI->mayLoad();
+ bool MayLoad = FirstMI->mayLoad();
bool IsUnscaled = isUnscaledLdst(Opc);
unsigned Reg = FirstMI->getOperand(0).getReg();
unsigned BaseReg = FirstMI->getOperand(1).getReg();
@@ -453,7 +462,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (mayLoad && Reg == MI->getOperand(0).getReg()) {
+ if (MayLoad && Reg == MI->getOperand(0).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
@@ -462,7 +471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// the two instructions, we can combine the second into the first.
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
!UsedRegs[MI->getOperand(0).getReg()]) {
- mergeForward = false;
+ MergeForward = false;
return MBBI;
}
@@ -471,7 +480,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// second.
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
!UsedRegs[FirstMI->getOperand(0).getReg()]) {
- mergeForward = true;
+ MergeForward = true;
return MBBI;
}
// Unable to combine these instructions due to interference in between.
@@ -798,14 +807,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
break;
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
- bool mergeForward = false;
+ bool MergeForward = false;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, mergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index ab6d375..75a17b9 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
AArch64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
else
- assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+ llvm_unreachable("Unexpected target flags with MO_GOT on GV operand");
} else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
@@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) const {
switch (MO.getType()) {
default:
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit())
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 21c927f..a30e4ad 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -175,7 +175,7 @@ def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
// This is for indirect tail calls to store the address of the destination.
def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
X22, X23, X24, X25, X26,
- X27, X28)>;
+ X27, X28, FP, LR)>;
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 0c3949e..d709bee 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -148,9 +148,9 @@ def : ReadAdvance<ReadVLD, 0>;
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
// operands are needed one cycle later if and only if they are to be
-// shifted. Otherwise, they too are needed two cycle later. This same
+// shifted. Otherwise, they too are needed two cycles later. This same
// ReadAdvance applies to Extended registers as well, even though there is
-// a seperate SchedPredicate for them.
+// a separate SchedPredicate for them.
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
WriteISReg, WriteIEReg,WriteIS,
WriteID32,WriteID64,
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
new file mode 100644
index 0000000..8209f96
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -0,0 +1,304 @@
+//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 8; // 3-way decode and 8-way issue
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops
+def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops
+def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops
+def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops
+def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops
+def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops
+def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "AArch64SchedA57WriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
+// defining the aliases precludes the need for mapping them using WriteRes. The
+// aliases are sufficient for creating a coarse, working model. As the model
+// evolves, InstRWs will be used to override these SchedAliases.
+
+def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
+def : SchedAlias<WriteI, A57Write_1cyc_1I>;
+def : SchedAlias<WriteISReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
+def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
+def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
+def : SchedAlias<WriteIM32, A57Write_3cyc_1M>;
+def : SchedAlias<WriteIM64, A57Write_5cyc_1M>;
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
+def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : SchedAlias<WriteSTP, A57Write_1cyc_1S>;
+def : SchedAlias<WriteAdr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>;
+def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
+def : SchedAlias<WriteF, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
+def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
+def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Forwarding logic is not [yet] explicitly modeled beyond what is captured
+// in the latencies of the A57 Generic SchedWriteRes's.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the above ShchedAlias mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+// Multiply high
+def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>;
+def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>;
+def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>;
+
+
+// Vector Load
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+// Vector Store
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+} // SchedModel = CortexA57Model
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
new file mode 100644
index 0000000..a8f421b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -0,0 +1,512 @@
+//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 6 micro-op types
+
+def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 7 micro-op types
+
+def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 7;
+}
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 8;
+}
+def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 9 micro-op types
+
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 10 micro-op types
+
+def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 10;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 11 micro-op types
+
+def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 12 micro-op types
+
+def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 13 micro-op types
+
+def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 13;
+}
+
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 5c65b75..1bf64fc 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {}
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
@@ -30,7 +29,9 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const char *bzeroEntry =
- (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr;
+ (V && V->isNullValue())
+ ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
+ : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
@@ -50,7 +51,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 8381f99..1180eea 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,12 +19,8 @@
namespace llvm {
class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const AArch64Subtarget *Subtarget;
-
public:
- explicit AArch64SelectionDAGInfo(const TargetMachine &TM);
+ explicit AArch64SelectionDAGInfo(const DataLayout *DL);
~AArch64SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index cd69994..bb0b72c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -30,21 +30,35 @@ static cl::opt<bool>
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
"converter pass"), cl::init(true), cl::Hidden);
-AArch64Subtarget::AArch64Subtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
- TargetTriple(TT), IsLittleEndian(LittleEndian) {
+AArch64Subtarget &
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
// Determine default and user-specified characteristics
if (CPUString.empty())
CPUString = "generic";
ParseSubtargetFeatures(CPUString, FS);
+ return *this;
}
+AArch64Subtarget::AArch64Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool LittleEndian)
+ : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
+ TargetTriple(TT),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized
+ // before TLInfo is constructed.
+ DL(isTargetMachO()
+ ? "e-m:o-i64:64-i128:128-n32:64-S128"
+ : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+ : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(&DL), TLInfo(TM) {}
+
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 590ea05..52124f6 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -14,8 +14,13 @@
#ifndef AArch64SUBTARGET_H
#define AArch64SUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -49,15 +54,32 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- /// IsLittleEndian - Is the target little endian?
- bool IsLittleEndian;
+ const DataLayout DL;
+ AArch64FrameLowering FrameLowering;
+ AArch64InstrInfo InstrInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64TargetLowering TLInfo;
+private:
+ /// initializeSubtargetDependencies - Initializes using CPUString and the
+ /// passed in feature string so that we can use initializer lists for
+ /// subtarget initialization.
+ AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool LittleEndian);
-
+ const std::string &FS, TargetMachine &TM, bool LittleEndian);
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
bool enableMachineScheduler() const override { return true; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -69,7 +91,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool isLittleEndian() const { return IsLittleEndian; }
+ bool isLittleEndian() const { return DL.isLittleEndian(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 0b5dd2f..f99b90b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -53,6 +53,12 @@ static cl::opt<bool>
EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
" optimization pass"), cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -71,16 +77,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, LittleEndian),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Subtarget.isTargetMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
- InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
- TSInfo(*this) {
+ Subtarget(TT, CPU, FS, *this, LittleEndian) {
initAsmInfo();
}
@@ -113,6 +110,7 @@ public:
return getTM<AArch64TargetMachine>();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
@@ -135,6 +133,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(this, PM);
}
+void AArch64PassConfig::addIRPasses() {
+ // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
+ // ourselves.
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
// Pass Pipeline Configuration
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
@@ -146,10 +158,6 @@ bool AArch64PassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
- // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
- // ourselves.
- addPass(createAtomicExpandLoadLinkedPass(TM));
-
return false;
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 079b19b..852cb3f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -15,13 +15,9 @@
#define AArch64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
-#include "AArch64ISelLowering.h"
#include "AArch64Subtarget.h"
-#include "AArch64FrameLowering.h"
-#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
namespace llvm {
@@ -29,13 +25,6 @@ class AArch64TargetMachine : public LLVMTargetMachine {
protected:
AArch64Subtarget Subtarget;
-private:
- const DataLayout DL;
- AArch64InstrInfo InstrInfo;
- AArch64TargetLowering TLInfo;
- AArch64FrameLowering FrameLowering;
- AArch64SelectionDAGInfo TSInfo;
-
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -46,18 +35,22 @@ public:
return &Subtarget;
}
const AArch64TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
const AArch64FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const AArch64InstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const AArch64RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 33e482a..1dac14b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -306,28 +306,64 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+
+ // Complex: to v2f32
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+
+ // Complex: to v4f32
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+
+ // Complex: to v2f64
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+
// LowerVectorFP_TO_INT
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
+
+ // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
+
+ // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
+
+ // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
};
int Idx = ConvertCostTableLookup<MVT>(
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 65b77c5..c42d11e 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -38,14 +38,19 @@ namespace {
class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
-public:
- typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
-
private:
StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<std::pair<bool, unsigned> > RegisterReqs;
+
+ AArch64TargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<AArch64TargetStreamer &>(TS);
+ }
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -54,6 +59,7 @@ private:
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
+ unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
int tryParseRegister();
int tryMatchVectorRegister(StringRef &Kind, bool expected);
bool parseRegister(OperandVector &Operands);
@@ -70,6 +76,10 @@ private:
bool parseDirectiveTLSDescCall(SMLoc L);
bool parseDirectiveLOH(StringRef LOH, SMLoc L);
+ bool parseDirectiveLtorg(SMLoc L);
+
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -108,6 +118,8 @@ public:
const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
+ if (Parser.getStreamer().getTargetStreamer() == nullptr)
+ new AArch64TargetStreamer(Parser.getStreamer());
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -117,7 +129,7 @@ public:
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
static bool classifySymbolRef(const MCExpr *Expr,
@@ -240,10 +252,10 @@ private:
// the add<>Operands() calls.
MCContext &Ctx;
+public:
AArch64Operand(KindTy K, MCContext &_Ctx)
: MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
-public:
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -607,7 +619,11 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
- return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32);
+ int64_t Val = MCE->getValue();
+ if (Val >> 32 != 0 && Val >> 32 != ~0LL)
+ return false;
+ Val &= 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
}
bool isLogicalImm64() const {
if (!isImm())
@@ -617,6 +633,23 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+ bool isLogicalImm32Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
+ }
+ bool isLogicalImm64Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
+ }
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
@@ -1348,7 +1381,8 @@ public:
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
assert(MCE && "Invalid logical immediate operand!");
- uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
Inst.addOperand(MCOperand::CreateImm(encoding));
}
@@ -1360,6 +1394,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(encoding));
}
+ void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
+ void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -1523,9 +1573,9 @@ public:
void print(raw_ostream &OS) const override;
- static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Token, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Token, Ctx);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->Tok.IsSuffix = IsSuffix;
@@ -1534,9 +1584,9 @@ public:
return Op;
}
- static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Register, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
Op->Reg.isVector = isVector;
Op->StartLoc = S;
@@ -1544,10 +1594,10 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
- unsigned NumElements, char ElementKind,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements,
+ char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.NumElements = NumElements;
@@ -1557,28 +1607,29 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateShiftedImm(const MCExpr *Val,
- unsigned ShiftAmount, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateShiftedImm(const MCExpr *Val,
+ unsigned ShiftAmount,
+ SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx);
Op->ShiftedImm .Val = Val;
Op->ShiftedImm.ShiftAmount = ShiftAmount;
Op->StartLoc = S;
@@ -1586,34 +1637,36 @@ public:
return Op;
}
- static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx);
Op->CondCode.Code = Code;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateFPImm(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx);
Op->FPImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S,
- uint64_t FeatureBits, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.FeatureBits = FeatureBits;
@@ -1622,27 +1675,28 @@ public:
return Op;
}
- static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx);
Op->SysCRImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx);
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp,
- unsigned Val, bool HasExplicitAmount,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
+ bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
Op->ShiftExtend.Type = ShOp;
Op->ShiftExtend.Amount = Val;
Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
@@ -1816,6 +1870,26 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return (RegNo == (unsigned)-1);
}
+// Matches a register name or register alias previously defined by '.req'
+unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
+ bool isVector) {
+ unsigned RegNum = isVector ? matchVectorRegName(Name)
+ : MatchRegisterName(Name);
+
+ if (RegNum == 0) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ auto Entry = RegisterReqs.find(Name.lower());
+ if (Entry == RegisterReqs.end())
+ return 0;
+ // set RegNum if the match is the right kind of register
+ if (isVector == Entry->getValue().first)
+ RegNum = Entry->getValue().second;
+ }
+ return RegNum;
+}
+
/// tryParseRegister - Try to parse a register name. The token must be an
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
@@ -1824,7 +1898,7 @@ int AArch64AsmParser::tryParseRegister() {
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = MatchRegisterName(lowerCase);
+ unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1854,7 +1928,8 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
// a '.'.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchVectorRegName(Head);
+ unsigned RegNum = matchRegisterNameAlias(Head, true);
+
if (RegNum) {
if (Next != StringRef::npos) {
Kind = Name.slice(Next, StringRef::npos);
@@ -2183,8 +2258,11 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
return TokError("invalid condition code");
Parser.Lex(); // Eat identifier token.
- if (invertCondCode)
+ if (invertCondCode) {
+ if (CC == AArch64CC::AL || CC == AArch64CC::NV)
+ return TokError("condition codes AL and NV are invalid for this instruction");
CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC));
+ }
Operands.push_back(
AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
@@ -2849,7 +2927,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned RegNum = MatchRegisterName(Tok.getString().lower());
+ unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false);
MCContext &Ctx = getContext();
const MCRegisterInfo *RI = Ctx.getRegisterInfo();
@@ -3000,6 +3078,43 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext()));
return false;
}
+ case AsmToken::Equal: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
+ return Error(Loc, "unexpected token in operand");
+ Parser.Lex(); // Eat '='
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+
+ MCContext& Ctx = getContext();
+ E = SMLoc::getFromPointer(Loc.getPointer() - 1);
+ // If the op is an imm and can be fit into a mov, then replace ldr with mov.
+ if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 &&
+ static_cast<AArch64Operand &>(*Operands[1]).isReg()) {
+ bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Operands[1]->getReg());
+ uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue();
+ uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16;
+ while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) {
+ ShiftAmt += 16;
+ Imm >>= 16;
+ }
+ if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) {
+ Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx);
+ Operands.push_back(AArch64Operand::CreateImm(
+ MCConstantExpr::Create(Imm, Ctx), S, E, Ctx));
+ if (ShiftAmt)
+ Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL,
+ ShiftAmt, true, S, E, Ctx));
+ return false;
+ }
+ }
+ // If it is a label or an imm that cannot fit in a movz, put it into CP.
+ const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
+ return false;
+ }
}
}
@@ -3029,6 +3144,15 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
.Case("bnv", "b.nv")
.Default(Name);
+ // First check for the AArch64-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
@@ -3443,8 +3567,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
case Match_MnemonicFail:
return Error(Loc, "unrecognized instruction mnemonic");
default:
- assert(0 && "unexpected error code!");
- return Error(Loc, "invalid instruction format");
+ llvm_unreachable("unexpected error code!");
}
}
@@ -3456,23 +3579,23 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- StringRef Tok = Op->getToken();
+ StringRef Tok = Op.getToken();
unsigned NumOperands = Operands.size();
if (NumOperands == 4 && Tok == "lsl") {
- AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- if (Op2->isReg() && Op3->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ if (Op2.isReg() && Op3.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
if (Op3CE) {
uint64_t Op3Val = Op3CE->getValue();
uint64_t NewOp3Val = 0;
uint64_t NewOp4Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op2->getReg())) {
+ Op2.getReg())) {
NewOp3Val = (32 - Op3Val) & 0x1f;
NewOp4Val = 31 - Op3Val;
} else {
@@ -3484,26 +3607,24 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
- Op3->getEndLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
Operands.push_back(AArch64Operand::CreateImm(
- NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
- delete Op3;
- delete Op;
+ NewOp4, Op3.getStartLoc(), Op3.getEndLoc(), getContext()));
+ Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3.getStartLoc(),
+ Op3.getEndLoc(), getContext());
}
}
} else if (NumOperands == 5) {
// FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
// UBFIZ -> UBFM aliases.
if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3511,21 +3632,21 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp3Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
NewOp3Val = (32 - Op3Val) & 0x1f;
else
NewOp3Val = (64 - Op3Val) & 0x3f;
@@ -3533,7 +3654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t NewOp4Val = Op4Val - 1;
if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested insert overflows register");
const MCExpr *NewOp3 =
@@ -3541,24 +3662,20 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[3] = AArch64Operand::CreateImm(
- NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext());
+ NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfi")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfiz")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfiz")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op3;
- delete Op4;
}
}
@@ -3566,13 +3683,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// UBFX -> UBFM aliases.
} else if (NumOperands == 5 &&
(Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3580,42 +3697,39 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp4Val = Op3Val + Op4Val - 1;
if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested extract overflows register");
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfxil")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfx")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfx")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op4;
}
}
}
@@ -3626,63 +3740,58 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
// FIXME: Likewise for sxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// FIXME: Likewise for uxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR32. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg()) {
- unsigned Reg = getWRegFromXReg(Op->getReg());
- Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg()) {
+ unsigned Reg = getWRegFromXReg(Op.getReg());
+ Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
if (NumOperands == 3 && Tok == "fmov") {
- AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]);
- if (RegOp->isReg() && ImmOp->isFPImm() &&
- ImmOp->getFPImm() == (unsigned)-1) {
+ AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
+ if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
unsigned zreg =
AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
- RegOp->getReg())
+ RegOp.getReg())
? AArch64::WZR
: AArch64::XZR;
- Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete ImmOp;
+ Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
@@ -3735,14 +3844,14 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
// If the match failed on a suffix token operand, tweak the diagnostic
// accordingly.
- if (((AArch64Operand *)Operands[ErrorInfo])->isToken() &&
- ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+ if (((AArch64Operand &)*Operands[ErrorInfo]).isToken() &&
+ ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix())
MatchResult = Match_InvalidSuffix;
return showMatchError(ErrorLoc, MatchResult);
@@ -3794,9 +3903,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidLabel:
case Match_MSR:
case Match_MRS: {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
// Any time we get here, there's nothing fancy to do. Just get the
// operand SMLoc and display the diagnostic.
- SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
return showMatchError(ErrorLoc, MatchResult);
@@ -3819,6 +3930,10 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(8, Loc);
if (IDVal == ".tlsdesccall")
return parseDirectiveTLSDescCall(Loc);
+ if (IDVal == ".ltorg" || IDVal == ".pool")
+ return parseDirectiveLtorg(Loc);
+ if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return parseDirectiveLOH(IDVal, Loc);
}
@@ -3920,6 +4035,66 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
return false;
}
+/// parseDirectiveLtorg
+/// ::= .ltorg | .pool
+bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
+ getTargetStreamer().emitCurrentConstantPool();
+ return false;
+}
+
+/// parseDirectiveReq
+/// ::= name .req registername
+bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ SMLoc SRegLoc = getLoc();
+ unsigned RegNum = tryParseRegister();
+ bool IsVector = false;
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ StringRef Kind;
+ RegNum = tryMatchVectorRegister(Kind, false);
+ if (!Kind.empty()) {
+ Error(SRegLoc, "vector register without type specifier expected");
+ return false;
+ }
+ IsVector = true;
+ }
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ Parser.eatToEndOfStatement();
+ Error(SRegLoc, "register name or alias expected");
+ return false;
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .req directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ auto pair = std::make_pair(IsVector, RegNum);
+ if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair)
+ Warning(L, "ignoring redefinition of register alias '" + Name + "'");
+
+ return true;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -3986,9 +4161,9 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp);
+ AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp);
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
// immediate in the syntax.
@@ -4036,9 +4211,9 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
ExpectedVal = 8;
break;
}
- if (!Op->isImm())
+ if (!Op.isImm())
return Match_InvalidOperand;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
if (!CE)
return Match_InvalidOperand;
if (CE->getValue() == ExpectedVal)
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2466368..2057c51 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_ARM64_TLVP:
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
default:
- assert(0 && "bad LLVMDisassembler_VariantKind");
- return MCSymbolRefExpr::VK_None;
+ llvm_unreachable("bad LLVMDisassembler_VariantKind");
}
}
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
index be4ccad..d64c05b 100644
--- a/lib/Target/AArch64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -4,11 +4,5 @@ add_llvm_library(LLVMAArch64Disassembler
AArch64Disassembler.cpp
AArch64ExternalSymbolizer.cpp
)
-# workaround for hanging compilation on MSVC8, 9 and 10
-#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-#set_property(
-# SOURCE ARMDisassembler.cpp
-# PROPERTY COMPILE_FLAGS "/Od"
-# )
-#endif()
+
add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index f484a5b..8a21f06 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
else
O << getRegisterName(Reg);
} else
- assert(0 && "unknown operand kind in printPostIncOperand64");
+ llvm_unreachable("unknown operand kind in printPostIncOperand64");
}
void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
@@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
while (Stride--) {
switch (Reg) {
default:
- assert(0 && "Vector register expected!");
+ llvm_unreachable("Vector register expected!");
case AArch64::Q0: Reg = AArch64::Q1; break;
case AArch64::Q1: Reg = AArch64::Q2; break;
case AArch64::Q2: Reg = AArch64::Q3; break;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index d8900d4..a917616 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -86,7 +86,7 @@ public:
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
- assert(0 && "Unknown fixup kind!");
+ llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_tlsdesc_call:
return 0;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index dc4a8bf..1763b40 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -96,4 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
ExceptionsType = ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
+
+ HasIdentDirective = true;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 464a18c..f051357 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- else {
- assert(MO.isImm() && "did not expect relocated expression");
- return static_cast<unsigned>(MO.getImm());
- }
- assert(0 && "Unable to encode MCOperand!");
- return 0;
+ assert(MO.isImm() && "did not expect relocated expression");
+ return static_cast<unsigned>(MO.getImm());
}
template<unsigned FixupKind> uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 85c3ec7..42a6787 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -81,37 +81,8 @@ void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
OS << *Expr;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
const MCSection *AArch64MCExpr::FindAssociatedSection() const {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index e869ed0..5422f9d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -147,7 +147,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 5c86189..ba95366 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
Log2Size = llvm::Log2_32(4);
switch (Sym->getKind()) {
default:
- assert(0 && "Unexpected symbol reference variant kind!");
+ llvm_unreachable("Unexpected symbol reference variant kind!");
case MCSymbolRefExpr::VK_PAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
return true;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
new file mode 100644
index 0000000..f9aeb35
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -0,0 +1,40 @@
+//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64TargetStreamer class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+//
+// AArch64TargetStreamer Implemenation
+//
+AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
+
+AArch64TargetStreamer::~AArch64TargetStreamer() {}
+
+// The constant pool handling is shared by all AArch64TargetStreamer
+// implementations.
+const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
+ return ConstantPools->addEntry(Streamer, Expr);
+}
+
+void AArch64TargetStreamer::emitCurrentConstantPool() {
+ ConstantPools->emitForCurrentSection(Streamer);
+}
+
+// finish() - write out any non-empty assembler constant pools.
+void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk
index e9d2323..a23c0e5 100644
--- a/lib/Target/AArch64/MCTargetDesc/Android.mk
+++ b/lib/Target/AArch64/MCTargetDesc/Android.mk
@@ -14,7 +14,8 @@ aarch64_mc_desc_SRC_FILES := \
AArch64MCAsmInfo.cpp \
AArch64MCCodeEmitter.cpp \
AArch64MCExpr.cpp \
- AArch64MCTargetDesc.cpp
+ AArch64MCTargetDesc.cpp \
+ AArch64TargetStreamer.cpp
# For the host
# =====================================================
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 7d5bced..6d8be5e 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCExpr.cpp
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
+ AArch64TargetStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9e4c389..9d2ce21 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) {
}
inline static CondCode getInvertedCondCode(CondCode Code) {
- switch (Code) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return NE;
- case NE: return EQ;
- case HS: return LO;
- case LO: return HS;
- case MI: return PL;
- case PL: return MI;
- case VS: return VC;
- case VC: return VS;
- case HI: return LS;
- case LS: return HI;
- case GE: return LT;
- case LT: return GE;
- case GT: return LE;
- case LE: return GT;
- }
+ // To reverse a condition it's necessary to only invert the low bit:
+
+ return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
}
/// Given a condition code, return NZCV flags that would satisfy that condition.
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 94faf6f..92eaf9e 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
}
- assert(0 && "Unhandled update pattern!");
- return 0;
+ llvm_unreachable("Unhandled update pattern!");
}
// Return true if this MachineInstr inserts a scalar (SPR) value into
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 55e9fe5..28d2610 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -82,7 +82,8 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
+ const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV,
+ ARMII::MO_NO_FLAG),
(Subtarget->isTargetELF()
? MCSymbolRefExpr::VK_ARM_TARGET1
: MCSymbolRefExpr::VK_None),
@@ -164,7 +165,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
(TF & ARMII::MO_HI16))
O << ":upper16:";
- O << *getSymbol(GV);
+ O << *GetARMGVSymbol(GV, TF);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -730,6 +731,32 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+ if (MMI) {
+ if (const Module *SourceModule = MMI->getModule()) {
+ // ABI_PCS_wchar_t to indicate wchar_t width
+ // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+ if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("wchar_size"))) {
+ int WCharWidth = WCharWidthValue->getZExtValue();
+ assert((WCharWidth == 2 || WCharWidth == 4) &&
+ "wchar_t width must be 2 or 4 bytes");
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+ }
+
+ // ABI_enum_size to indicate enum width
+ // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+ // (all enums contain a value needing 32 bits to encode).
+ if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("min_enum_size"))) {
+ int EnumWidth = EnumWidthValue->getZExtValue();
+ assert((EnumWidth == 1 || EnumWidth == 4) &&
+ "Minimum enum width must be 1 or 4 bytes");
+ int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+ ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+ }
+ }
+ }
+
if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowTZVirtualization);
@@ -768,23 +795,41 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
unsigned char TargetFlags) {
- bool isIndirect = Subtarget->isTargetMachO() &&
- (TargetFlags & ARMII::MO_NONLAZY) &&
- Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
- if (!isIndirect)
- return getSymbol(GV);
+ if (Subtarget->isTargetMachO()) {
+ bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+
+ if (!IsIndirect)
+ return getSymbol(GV);
- // FIXME: Remove this when Darwin transition to @GOT like syntax.
- MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoMachO &MMIMachO =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
- MMIMachO.getGVStubEntry(MCSym);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::
- StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
- return MCSym;
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym)
+ : MMIMachO.getGVStubEntry(MCSym);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
+ return MCSym;
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+
+ bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+ if (!IsIndirect)
+ return getSymbol(GV);
+
+ SmallString<128> Name;
+ Name = "__imp_";
+ getNameWithPrefix(Name, GV);
+
+ return OutContext.GetOrCreateSymbol(Name);
+ } else if (Subtarget->isTargetELF()) {
+ return getSymbol(GV);
+ }
+ llvm_unreachable("unexpected target");
}
void ARMAsmPrinter::
@@ -928,7 +973,7 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
- OutContext);
+ OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B)
@@ -1225,8 +1270,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Add 's' bit operand (always reg0 for this)
.addReg(0));
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = getSymbol(GV);
+ const MachineOperand &Op = MI->getOperand(0);
+ const GlobalValue *GV = Op.getGlobal();
+ const unsigned TF = Op.getTargetFlags();
+ MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc)
.addExpr(GVSymExpr)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bc266e8..0288db9 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -102,14 +103,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
// currently defaults to no prepass hazard recognizer.
-ScheduleHazardRecognizer *ARMBaseInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
+ const InstrItineraryData *II =
+ &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -1885,7 +1887,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!Subtarget.isMinSize())
+ if (!MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize))
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -4358,6 +4361,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
MI->addRegisterKilled(DReg, TRI, true);
}
+void ARMBaseInstrInfo::getUnconditionalBranch(
+ MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+ if (Subtarget.isThumb())
+ Branch.setOpcode(ARM::tB);
+ else if (Subtarget.isThumb2())
+ Branch.setOpcode(ARM::t2B);
+ else
+ Branch.setOpcode(ARM::Bcc);
+
+ Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+ Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ Branch.addOperand(MCOperand::CreateReg(0));
+}
+
+void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
+ if (Subtarget.isThumb())
+ MI.setOpcode(ARM::tTRAP);
+ else if (Subtarget.useNaClTrap())
+ MI.setOpcode(ARM::TRAPNaCl);
+ else
+ MI.setOpcode(ARM::TRAP);
+}
+
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 4b3e740..b8d6758 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -50,7 +50,7 @@ public:
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
@@ -229,6 +229,13 @@ public:
const TargetRegisterInfo*) const override;
void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
const TargetRegisterInfo *TRI) const override;
+
+ void
+ getUnconditionalBranch(MCInst &Branch,
+ const MCSymbolRefExpr *BranchTarget) const override;
+
+ void getTrap(MCInst &MI) const override;
+
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a2eee9f..cdd91c7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,9 +45,12 @@ using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
- if (STI.isTargetMachO())
- FramePtr = ARM::R7;
- else if (STI.isTargetWindows())
+ if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin() || STI.isThumb1Only())
+ FramePtr = ARM::R7;
+ else
+ FramePtr = ARM::R11;
+ } else if (STI.isTargetWindows())
FramePtr = ARM::R11;
else // ARM EABI
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 2fd7edd..5fb6ebf 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,6 +15,7 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 6045738..51d3dbb 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -927,10 +927,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::tTPsoft:
case ARM::TPsoft: {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
- .addExternalSymbol("__aeabi_read_tp", 0);
+ MachineInstrBuilder MIB;
+ if (Opcode == ARM::tTPsoft)
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__aeabi_read_tp", 0);
+ else
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6f8fb1a..e2d90cd 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -590,7 +590,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
- if (Subtarget->useMovt() &&
+ if (Subtarget->useMovt(*FuncInfo.MF) &&
(Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
unsigned Opc;
unsigned char TF = 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0caf4bf..a67b360 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -39,6 +39,10 @@ static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
+ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {}
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -220,7 +224,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin()) {
GPRCS2Size += 4;
break;
}
@@ -380,7 +384,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO())
+ if (STI.isTargetDarwin())
break;
// fallthrough
case ARM::R0:
@@ -445,7 +449,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin()) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
@@ -810,7 +814,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetMachO())) continue;
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -888,7 +892,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetMachO())) continue;
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1438,7 +1442,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.isTargetMachO()) {
+ if (!STI.isTargetDarwin()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -1460,7 +1464,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
- if (!STI.isTargetMachO()) {
+ if (!STI.isTargetDarwin()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 981d320..709afbc 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -14,7 +14,6 @@
#ifndef ARM_FRAMEINFO_H
#define ARM_FRAMEINFO_H
-#include "ARMSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -25,10 +24,7 @@ protected:
const ARMSubtarget &STI;
public:
- explicit ARMFrameLowering(const ARMSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
- STI(sti) {
- }
+ explicit ARMFrameLowering(const ARMSubtarget &sti);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 08d598d..38547cf 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -60,22 +60,17 @@ enum AddrMode2Type {
};
class ARMDAGToDAGISel : public SelectionDAGISel {
- ARMBaseTargetMachine &TM;
-
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
public:
- explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
- CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm),
- Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
- }
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>();
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -429,8 +424,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (Use->getOpcode() == ISD::CopyToReg)
return true;
if (Use->isMachineOpcode()) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ CurDAG->getTarget().getInstrInfo());
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
@@ -2444,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(*MF))
// Thumb2-aware targets have the MOVT instruction, so all immediates can
// be done with MOV + MOVT, at worst.
UseCP = false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 00d07e8..4bfa5a8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -155,16 +155,16 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new TargetLoweringObjectFileMachO();
- if (TM.getSubtarget<ARMSubtarget>().isTargetWindows())
+ if (TT.isOSWindows())
return new TargetLoweringObjectFileCOFF();
return new ARMElfTargetObjectFile();
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
Itins = TM.getInstrItineraryData();
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setExceptionSelectorRegister(ARM::R1);
}
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
@@ -1199,7 +1205,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
- else if (Subtarget->hasVFP2() &&
+ else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
@@ -1207,10 +1213,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
if (!Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() && !isVarArg)
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
- } else if (Subtarget->hasVFP2() && !isVarArg)
+ } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
@@ -1598,8 +1604,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
- assert (getTargetMachine().getRelocationModel() == Reloc::Static
- && "long-calls with non-static relocation model!");
+ assert((Subtarget->isTargetWindows() ||
+ getTargetMachine().getRelocationModel() == Reloc::Static) &&
+ "long-calls with non-static relocation model!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
@@ -1647,6 +1654,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
+ TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
+ Callee), MachinePointerInfo::getGOT(),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@@ -1688,7 +1708,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = Subtarget->isMinSize();
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -2326,7 +2347,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+ 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2434,7 +2456,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
- if (Subtarget->useMovt()) {
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
@@ -2456,7 +2478,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
@@ -2476,18 +2498,27 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
- assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+ assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ "Windows on ARM expects to use movw/movt");
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const ARMII::TOF TargetFlags =
+ (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
EVT PtrVT = getPointerTy();
+ SDValue Result;
SDLoc DL(Op);
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
- return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+ Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+ TargetFlags));
+ if (GV->hasDLLImportStorageClass())
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+ return Result;
}
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
@@ -2535,6 +2566,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_rbit: {
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "RBIT intrinsic must have i32 type!");
+ return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+ }
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -4492,6 +4528,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
BitMask <<= 8;
ImmMask <<= 1;
}
+
+ if (DAG.getTargetLoweringInfo().isBigEndian())
+ // swap higher and lower 32 bit word
+ Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -6078,7 +6119,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
- &Args, 0)
+ std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -6213,6 +6254,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
}
}
@@ -7112,6 +7157,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module receives its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -7360,6 +7472,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
}
}
@@ -8315,6 +8429,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+ std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
DCI.RemoveFromWorklist(LD);
DAG.DeleteNode(LD);
@@ -8382,7 +8498,8 @@ static SDValue PerformSTORECombine(SDNode *N,
SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -10471,13 +10588,39 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10635,14 +10778,20 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 64;
-
- // For the real atomic operations, we have ldrex/strex up to 64 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+ // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+ // anything for those.
+ bool IsMClass = Subtarget->isMClass();
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return Size == 64 && !IsMClass;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+ }
+
+ // For the real atomic operations, we have ldrex/strex up to 32 bits,
+ // and up to 64 bits on the non-M profiles
+ unsigned AtomicLimit = IsMClass ? 32 : 64;
+ return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index c15305c..1ace0f3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -95,6 +95,8 @@ namespace llvm {
PRELOAD, // Preload
+ WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -578,6 +581,9 @@ namespace llvm {
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 718d5da..2bb8976 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -270,8 +270,8 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
@@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> {
// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
def rot_imm_XFORM: SDNodeXForm<imm, [{
switch (N->getZExtValue()){
- default: assert(0);
+ default: llvm_unreachable(nullptr);
case 0: return CurDAG->getTargetConstant(0, MVT::i32);
case 8: return CurDAG->getTargetConstant(1, MVT::i32);
case 16: return CurDAG->getTargetConstant(2, MVT::i32);
@@ -594,7 +594,7 @@ def so_imm2part : PatLeaf<(imm), [{
/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
///
def arm_i32imm : PatLeaf<(imm), [{
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(*MF))
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
@@ -3334,8 +3334,8 @@ def SBFX : I<(outs GPRnopc:$Rd),
let Inst{3-0} = Rn;
}
-def UBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
+def UBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -4443,7 +4443,7 @@ def instsyncb_opt : Operand<i32> {
let DecoderMethod = "DecodeInstSyncBarrierOption";
}
-// memory barriers protect the atomic sequences
+// Memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
"dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
@@ -4452,7 +4452,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{31-4} = 0xf57ff05;
let Inst{3-0} = opt;
}
-}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
"dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
@@ -4464,12 +4463,13 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// ISB has only full system option
def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
- "isb", "\t$opt", []>,
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = opt;
}
+}
let usesCustomInserter = 1, Defs = [CPSR] in {
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let Inst{11-0} = a;
}
+// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+ def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -5100,9 +5113,11 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
// __aeabi_read_tp preserves the registers r1-r3.
// This is a pseudo inst so that we can get the encoding right,
// complete with fixup for the aeabi_read_tp function.
+// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern
+// is defined in "ARMInstrThumb.td".
let isCall = 1,
Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
- def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br,
[(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index b32b5d2..c02bb3b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -6372,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfSingle definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
@@ -6406,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
dsub_0))>;
}
+// The following class definition is basically a copy of the
+// Lengthen_Double definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length, but which ends up only
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
@@ -6443,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfDouble definition above, however with an additional VREV16d8
+// instruction to convert data loaded by VLD1LN into proper vector format
+// in big endian mode.
+multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+}
+
defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
-defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+let Predicates = [IsLE] in {
+ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
-// Double lengthening - v4i8 -> v4i16 -> v4i32
-defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i8 -> v2i16 -> v2i32
-defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i16 -> v2i32 -> v2i64
-defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+}
+
+let Predicates = [IsBE] in {
+ defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
+
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
+}
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
- (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+}
+// The following patterns are basically a copy of the patterns above,
+// however with an additional VREV16d instruction to convert data
+// loaded by VLD1LN into proper vector format in big endian mode.
+let Predicates = [IsBE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+}
//===----------------------------------------------------------------------===//
// Assembler aliases
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c30d6ab..85e9351 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3209,27 +3209,28 @@ def t2MOVCCi32imm
let hasSideEffects = 1 in {
def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
"dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
- Requires<[HasDB]> {
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f5;
let Inst{3-0} = opt;
}
-}
def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
"dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
- Requires<[HasDB]> {
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
}
def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
- "isb", "\t$opt", []>, Requires<[HasDB]> {
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = opt;
}
+}
class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8821c2d..6d1114d 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,6 +13,7 @@
#include "ARMJITInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMRelocations.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
@@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
}
}
}
+
+void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ee4c863..27e2a20 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -14,7 +14,6 @@
#ifndef ARMJITINFO_H
#define ARMJITINFO_H
-#include "ARMMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -103,12 +102,7 @@ namespace llvm {
/// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
/// jump table ids to jump table bases map; remember if codegen relocation
/// model is PIC.
- void Initialize(const MachineFunction &MF, bool isPIC) {
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
- JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
- IsPIC = isPIC;
- }
+ void Initialize(const MachineFunction &MF, bool isPIC);
/// getConstantPoolEntryAddr - The ARM target puts all constant
/// pool entries into constant islands. This returns the address of the
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index ee7df54..a03bcdb 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -505,7 +505,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback. Check for this.
- if (Opcode == ARM::tLDRi && isThumb1)
+ if (Opcode == ARM::tLDMIA && isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
Writeback = false;
@@ -519,17 +519,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
- // The base isn't dead after a merged instruction with writeback. Update
- // future uses of the base with the added offset (if possible), or reset
- // the base register as necessary.
- if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
-
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
+
+ // The base isn't dead after a merged instruction with writeback. Update
+ // future uses of the base with the added offset (if possible), or reset
+ // the base register as necessary.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -1734,6 +1734,12 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
+ // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
+ if (isThumb1) {
+ delete RS;
+ return false;
+ }
+
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 48141b1..023f5f8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -34,7 +34,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
OutContext);
switch (Option) {
default: llvm_unreachable("Unknown target flag on symbol operand");
- case 0:
+ case ARMII::MO_NO_FLAG:
break;
case ARMII::MO_LO16:
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index af445e2..892b269 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -12,3 +12,13 @@
using namespace llvm;
void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+ : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+ RestoreSPFromFP(false), LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+ GlobalBaseReg(0) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d7ec6eb..44a9e34 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -130,16 +130,7 @@ public:
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
- explicit ARMFunctionInfo(MachineFunction &MF) :
- isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0),
- ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
- LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+ explicit ARMFunctionInfo(MachineFunction &MF);
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -220,7 +211,7 @@ public:
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
- assert(0 && "Duplicate entries!");
+ llvm_unreachable("Duplicate entries!");
}
unsigned getOriginalCPIdx(unsigned CloneIdx) const {
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 008ad64..3dcc0df 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
-}
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
@@ -34,6 +32,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
+ const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
@@ -44,7 +43,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
unsigned BytesLeft = SizeVal & 3;
@@ -54,7 +53,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = 4;
unsigned i = 0;
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
- const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+ const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
SDValue TFOps[6];
SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
@@ -151,9 +150,10 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
+ const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
// Use default for non-AAPCS (or MachO) subtargets
- if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() ||
- Subtarget->isTargetWindows())
+ if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
+ Subtarget.isTargetWindows())
return SDValue();
const ARMTargetLowering &TLI =
@@ -191,7 +191,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
.setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
- TLI.getPointerTy()), &Args, 0)
+ TLI.getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 8c2397b..13769dc 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -36,12 +36,8 @@ namespace ARM_AM {
} // end namespace ARM_AM
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const ARMSubtarget *Subtarget;
-
public:
- explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ explicit ARMSelectionDAGInfo(const DataLayout &DL);
~ARMSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5b204f6..0eb24ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,8 +12,15 @@
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -76,22 +83,89 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool IsLittle,
- const TargetOptions &Options)
- : ARMGenSubtargetInfo(TT, CPU, FS)
- , ARMProcFamily(Others)
- , ARMProcClass(None)
- , stackAlignment(4)
- , CPUString(CPU)
- , IsLittle(IsLittle)
- , TargetTriple(TT)
- , Options(Options)
- , TargetABI(ARM_ABI_UNKNOWN) {
+static std::string computeDataLayout(ARMSubtarget &ST) {
+ std::string Ret = "";
+
+ if (ST.isLittle())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
+ // align to 32.
+ if (ST.isThumb())
+ Ret += "-i1:8:32-i8:8:32-i16:16:32";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (!ST.isAPCS_ABI())
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ST.isAPCS_ABI())
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ST.isAPCS_ABI())
+ Ret += "-v64:32:64-v128:32:128";
+ else
+ Ret += "-v128:64:128";
+
+ // On thumb and APCS, only try to align aggregates to 32 bits (the default is
+ // 64 bits).
+ if (ST.isThumb() || ST.isAPCS_ABI())
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+ // aligned everywhere else.
+ if (ST.isTargetNaCl())
+ Ret += "-S128";
+ else if (ST.isAAPCS_ABI())
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+/// initializeSubtargetDependencies - Initializes using a CPU and feature string
+/// so that we can use initializer lists for subtarget initialization.
+ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool IsLittle, const TargetOptions &Options)
+ : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
+ TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+ TSInfo(DL), JITInfo(),
+ InstrInfo(isThumb1Only()
+ ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+ : !isThumb()
+ ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+ : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+ TLInfo(TM),
+ FrameLowering(!isThumb1Only()
+ ? new ARMFrameLowering(*this)
+ : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
+
void ARMSubtarget::initializeEnvironment() {
HasV4TOps = false;
HasV5TOps = false;
@@ -106,7 +180,6 @@ void ARMSubtarget::initializeEnvironment() {
HasVFPv4 = false;
HasFPARMv8 = false;
HasNEON = false;
- MinSize = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
SlowFPVMLx = false;
@@ -158,9 +231,6 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
-
- MinSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -353,6 +423,17 @@ bool ARMSubtarget::hasSinCos() const {
!getTargetTriple().isOSVersionLT(7, 0);
}
+// Enable the PostMachineScheduler if the target selects it instead of
+// PostRAScheduler. Currently only available on the command line via
+// -misched-postra.
+bool ARMSubtarget::enablePostMachineScheduler() const {
+ return PostRAScheduler;
+}
+
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+ return hasAnyDataBarrier() && !isThumb1Only();
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
@@ -360,3 +441,12 @@ bool ARMSubtarget::enablePostRAScheduler(
Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
+
+bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+ // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
+ // immediates as it is inherently position independent, and may be out of
+ // range otherwise.
+ return UseMovt && (isTargetWindows() ||
+ !MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize));
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 38536b2..8f6c165 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,8 +14,20 @@
#ifndef ARMSUBTARGET_H
#define ARMSUBTARGET_H
+
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "ARMJITInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -64,10 +76,6 @@ protected:
bool HasFPARMv8;
bool HasNEON;
- /// MinSize - True if the function being compiled has the "minsize" attribute
- /// and should be optimised for size at the expense of speed.
- bool MinSize;
-
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
@@ -236,7 +244,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool IsLittle,
+ const std::string &FS, TargetMachine &TM, bool IsLittle,
const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
@@ -250,7 +258,31 @@ protected:
/// \brief Reset the features for the ARM target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ const DataLayout *getDataLayout() const { return &DL; }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ ARMJITInfo *getJITInfo() { return &JITInfo; }
+ const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
+ const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
+ const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
private:
+ const DataLayout DL;
+ ARMSelectionDAGInfo TSInfo;
+ ARMJITInfo JITInfo;
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
+ ARMTargetLowering TLInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
+
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
public:
@@ -286,7 +318,6 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
- bool isMinSize() const { return MinSize; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -382,7 +413,8 @@ public:
bool isR9Reserved() const { return IsR9Reserved; }
- bool useMovt() const { return UseMovt && !isMinSize(); }
+ bool useMovt(const MachineFunction &MF) const;
+
bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
@@ -399,11 +431,17 @@ public:
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// True for some subtargets at > -O0.
+ bool enablePostMachineScheduler() const;
+
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
+ // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
+ bool enableAtomicExpandLoadLinked() const override;
+
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 8876227..d85194b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
cl::desc("Inhibit optimization of S->D register accesses on A15"),
cl::init(false));
+static cl::opt<bool>
+EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -43,12 +49,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, Options),
- JITInfo(),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, isLittle, Options) {
// Default to triple-appropriate float ABI
if (Options.FloatABIType == FloatABI::Default)
@@ -67,74 +70,11 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
void ARMTargetMachine::anchor() { }
-static std::string computeDataLayout(ARMSubtarget &ST) {
- std::string Ret = "";
-
- if (ST.isLittle())
- // Little endian.
- Ret += "e";
- else
- // Big endian.
- Ret += "E";
-
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
-
- // Pointers are 32 bits and aligned to 32 bits.
- Ret += "-p:32:32";
-
- // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
- // align to 32.
- if (ST.isThumb())
- Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
- // ABIs other than APCS have 64 bit integers with natural alignment.
- if (!ST.isAPCS_ABI())
- Ret += "-i64:64";
-
- // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
- // bits, others to 64 bits. We always try to align to 64 bits.
- if (ST.isAPCS_ABI())
- Ret += "-f64:32:64";
-
- // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
- // to 64. We always ty to give them natural alignment.
- if (ST.isAPCS_ABI())
- Ret += "-v64:32:64-v128:32:128";
- else
- Ret += "-v128:64:128";
-
- // On thumb and APCS, only try to align aggregates to 32 bits (the default is
- // 64 bits).
- if (ST.isThumb() || ST.isAPCS_ABI())
- Ret += "-a:0:32";
-
- // Integer registers are 32 bits.
- Ret += "-n32";
-
- // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
- // aligned everywhere else.
- if (ST.isTargetNaCl())
- Ret += "-S128";
- else if (ST.isAAPCS_ABI())
- Ret += "-S64";
- else
- Ret += "-S32";
-
- return Ret;
-}
-
-ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
- InstrInfo(Subtarget),
- DL(computeDataLayout(Subtarget)),
- TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
initAsmInfo();
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
@@ -143,21 +83,21 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
void ARMLETargetMachine::anchor() { }
-ARMLETargetMachine::
-ARMLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
void ARMBETargetMachine::anchor() { }
-ARMBETargetMachine::
-ARMBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void ThumbTargetMachine::anchor() { }
@@ -165,38 +105,29 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
- InstrInfo(Subtarget.hasThumb2()
- ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
- : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
- DL(computeDataLayout(Subtarget)),
- TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget.hasThumb2()
- ? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL,
+ isLittle) {
initAsmInfo();
}
void ThumbLETargetMachine::anchor() { }
-ThumbLETargetMachine::
-ThumbLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
void ThumbBETargetMachine::anchor() { }
-ThumbBETargetMachine::
-ThumbBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
/// ARM Code Generator Pass Configuration Options.
@@ -213,6 +144,7 @@ public:
return *getARMTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@@ -225,11 +157,21 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
}
-bool ARMPassConfig::addPreISel() {
+void ARMPassConfig::addIRPasses() {
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass());
+ TargetPassConfig::addIRPasses();
+}
+
+bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 664c992..b72b1df 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,17 +14,9 @@
#ifndef ARMTARGETMACHINE_H
#define ARMTARGETMACHINE_H
-#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
-#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -32,10 +24,6 @@ namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
protected:
ARMSubtarget Subtarget;
-private:
- ARMJITInfo JITInfo;
- InstrItineraryData InstrItins;
-
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -44,15 +32,29 @@ public:
CodeGenOpt::Level OL,
bool isLittle);
- ARMJITInfo *getJITInfo() override { return &JITInfo; }
const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const ARMBaseRegisterInfo *getRegisterInfo() const override {
+ return getSubtargetImpl()->getRegisterInfo();
+ }
const ARMTargetLowering *getTargetLowering() const override {
- // Implemented by derived classes
- llvm_unreachable("getTargetLowering not implemented");
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const ARMBaseInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const ARMFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
}
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
/// \brief Register ARM analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
@@ -67,35 +69,10 @@ public:
///
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- ARMInstrInfo InstrInfo;
- const DataLayout DL; // Calculates type size & alignment
- ARMTargetLowering TLInfo;
- ARMSelectionDAGInfo TSInfo;
- ARMFrameLowering FrameLowering;
public:
- ARMTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- const ARMRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- const ARMTargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
- const ARMFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const ARMInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
+ ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
};
/// ARMLETargetMachine - ARM little endian target machine.
@@ -114,10 +91,9 @@ public:
class ARMBETargetMachine : public ARMTargetMachine {
void anchor() override;
public:
- ARMBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
};
/// ThumbTargetMachine - Thumb target machine.
@@ -126,43 +102,10 @@ public:
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- // Either Thumb1InstrInfo or Thumb2InstrInfo.
- std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
- const DataLayout DL; // Calculates type size & alignment
- ARMTargetLowering TLInfo;
- ARMSelectionDAGInfo TSInfo;
- // Either Thumb1FrameLowering or ARMFrameLowering.
- std::unique_ptr<ARMFrameLowering> FrameLowering;
public:
- ThumbTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
- const ARMBaseRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
- }
-
- const ARMTargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-
- /// returns either Thumb1InstrInfo or Thumb2InstrInfo
- const ARMBaseInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
- }
- /// returns either Thumb1FrameLowering or ARMFrameLowering
- const ARMFrameLowering *getFrameLowering() const override {
- return FrameLowering.get();
- }
- const DataLayout *getDataLayout() const override { return &DL; }
+ ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
};
/// ThumbLETargetMachine - Thumb little endian target machine.
@@ -170,10 +113,10 @@ public:
class ThumbLETargetMachine : public ThumbTargetMachine {
void anchor() override;
public:
- ThumbLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// ThumbBETargetMachine - Thumb big endian target machine.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da..a2ace62 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only handle costs of reverse shuffles for now.
- if (Kind != SK_Reverse)
+ // We only handle costs of reverse and alternate shuffles for now.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
- // Reverse shuffle cost one instruction if we are shuffling within a double
- // word (vrev) or two if we shuffle a quad word (vrev, vext).
- { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
- { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
- };
+ if (Kind == SK_Reverse) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a
+ // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+ }
+ if (Kind == SK_Alternate) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ // Alt shuffle cost table for ARM. Cost is the number of instructions
+ // required to create the shuffled vector.
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ int Idx =
+ CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ }
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 5cdf394..b62706c 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -190,11 +190,11 @@ class ARMAsmParser : public MCTargetAsmParser {
}
int tryParseRegister();
- bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
- int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool tryParseRegisterWithWriteBack(OperandVector &);
+ int tryParseShiftRegister(OperandVector &);
+ bool parseRegisterList(OperandVector &);
+ bool parseMemory(OperandVector &);
+ bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
unsigned &ShiftAmount);
@@ -282,54 +282,42 @@ class ARMAsmParser : public MCTargetAsmParser {
/// }
- OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocNumOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocRegOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocOptionOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseMemBarrierOptOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseInstSyncBarrierOptOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseProcIFlagsOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseMSRMaskOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
- StringRef Op, int Low, int High);
- OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ OperandMatchResultTy parseITCondCode(OperandVector &);
+ OperandMatchResultTy parseCoprocNumOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocRegOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocOptionOperand(OperandVector &);
+ OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
+ OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+ OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
+ int High);
+ OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
return parsePKHImm(O, "lsl", 0, 31);
}
- OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ OperandMatchResultTy parsePKHASRImm(OperandVector &O) {
return parsePKHImm(O, "asr", 1, 32);
}
- OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseSetEndImm(OperandVector &);
+ OperandMatchResultTy parseShifterImm(OperandVector &);
+ OperandMatchResultTy parseRotImm(OperandVector &);
+ OperandMatchResultTy parseBitfield(OperandVector &);
+ OperandMatchResultTy parsePostIdxReg(OperandVector &);
+ OperandMatchResultTy parseAM3Offset(OperandVector &);
+ OperandMatchResultTy parseFPImm(OperandVector &);
+ OperandMatchResultTy parseVectorList(OperandVector &);
OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
SMLoc &EndLoc);
// Asm Match Converter Methods
- void cvtThumbMultiply(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtThumbBranches(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
-
- bool validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool shouldOmitCCOutOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- bool shouldOmitPredicateOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
+ void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+
+ bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
+ bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+
public:
enum ARMMatchResultTy {
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -361,19 +349,17 @@ public:
// Implementation of the MCTargetAsmParser interface:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -545,8 +531,8 @@ class ARMOperand : public MCParsedAsmOperand {
struct BitfieldOp Bitfield;
};
- ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
public:
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -2481,56 +2467,58 @@ public:
void print(raw_ostream &OS) const override;
- static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ITCondMask);
Op->ITMask.Mask = Mask;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CondCode);
+ static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CoprocNum);
+ static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocNum);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CoprocReg);
+ static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocReg);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_CoprocOption);
Op->Cop.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CCOut);
+ static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CCOut);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_Token);
+ static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -2538,20 +2526,20 @@ public:
return Op;
}
- static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_Register);
+ static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
- unsigned SrcReg,
- unsigned ShiftReg,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftReg, unsigned ShiftImm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedRegister);
Op->RegShiftedReg.ShiftTy = ShTy;
Op->RegShiftedReg.SrcReg = SrcReg;
Op->RegShiftedReg.ShiftReg = ShiftReg;
@@ -2561,11 +2549,10 @@ public:
return Op;
}
- static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
- unsigned SrcReg,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedImmediate);
Op->RegShiftedImm.ShiftTy = ShTy;
Op->RegShiftedImm.SrcReg = SrcReg;
Op->RegShiftedImm.ShiftImm = ShiftImm;
@@ -2574,9 +2561,9 @@ public:
return Op;
}
- static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShifterImmediate);
Op->ShifterImm.isASR = isASR;
Op->ShifterImm.Imm = Imm;
Op->StartLoc = S;
@@ -2584,17 +2571,18 @@ public:
return Op;
}
- static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_RotateImmediate);
Op->RotImm.Imm = Imm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ static std::unique_ptr<ARMOperand>
+ CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
Op->Bitfield.LSB = LSB;
Op->Bitfield.Width = Width;
Op->StartLoc = S;
@@ -2602,8 +2590,8 @@ public:
return Op;
}
- static ARMOperand *
- CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs,
+ static std::unique_ptr<ARMOperand>
+ CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
assert (Regs.size() > 0 && "RegList contains no registers?");
KindTy Kind = k_RegisterList;
@@ -2617,7 +2605,7 @@ public:
// Sort based on the register encoding values.
array_pod_sort(Regs.begin(), Regs.end());
- ARMOperand *Op = new ARMOperand(Kind);
+ auto Op = make_unique<ARMOperand>(Kind);
for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator
I = Regs.begin(), E = Regs.end(); I != E; ++I)
Op->Registers.push_back(I->second);
@@ -2626,9 +2614,11 @@ public:
return Op;
}
- static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
- bool isDoubleSpaced, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorList);
+ static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum,
+ unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2637,10 +2627,10 @@ public:
return Op;
}
- static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
- bool isDoubleSpaced,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListAllLanes);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2649,11 +2639,10 @@ public:
return Op;
}
- static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
- unsigned Index,
- bool isDoubleSpaced,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListIndexed);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.LaneIndex = Index;
@@ -2663,33 +2652,30 @@ public:
return Op;
}
- static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<ARMOperand>(k_VectorIndex);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_Immediate);
+ static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateMem(unsigned BaseRegNum,
- const MCConstantExpr *OffsetImm,
- unsigned OffsetRegNum,
- ARM_AM::ShiftOpc ShiftType,
- unsigned ShiftImm,
- unsigned Alignment,
- bool isNegative,
- SMLoc S, SMLoc E,
- SMLoc AlignmentLoc = SMLoc()) {
- ARMOperand *Op = new ARMOperand(k_Memory);
+ static std::unique_ptr<ARMOperand>
+ CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S,
+ SMLoc E, SMLoc AlignmentLoc = SMLoc()) {
+ auto Op = make_unique<ARMOperand>(k_Memory);
Op->Memory.BaseRegNum = BaseRegNum;
Op->Memory.OffsetImm = OffsetImm;
Op->Memory.OffsetRegNum = OffsetRegNum;
@@ -2703,11 +2689,10 @@ public:
return Op;
}
- static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
- ARM_AM::ShiftOpc ShiftTy,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ static std::unique_ptr<ARMOperand>
+ CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_PostIndexRegister);
Op->PostIdxReg.RegNum = RegNum;
Op->PostIdxReg.isAdd = isAdd;
Op->PostIdxReg.ShiftTy = ShiftTy;
@@ -2717,33 +2702,35 @@ public:
return Op;
}
- static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
+ static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MemBarrierOpt);
Op->MBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt,
- SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt);
+ static std::unique_ptr<ARMOperand>
+ CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt);
Op->ISBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_ProcIFlags);
+ static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ProcIFlags);
Op->IFlags.Val = IFlags;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_MSRMask);
+ static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MSRMask);
Op->MMask.Val = MMask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -2947,8 +2934,7 @@ int ARMAsmParser::tryParseRegister() {
// occurs, return -1. An irrecoverable error is one where tokens have been
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
-int ARMAsmParser::tryParseShiftRegister(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -2972,7 +2958,8 @@ int ARMAsmParser::tryParseShiftRegister(
// The source register for the shift has already been added to the
// operand list, so we need to pop it off and combine it into the shifted
// register operand instead.
- std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ std::unique_ptr<ARMOperand> PrevOp(
+ (ARMOperand *)Operands.pop_back_val().release());
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
@@ -3049,8 +3036,7 @@ int ARMAsmParser::tryParseShiftRegister(
///
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
-bool ARMAsmParser::
-tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
@@ -3096,17 +3082,25 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// MatchCoprocessorOperandName - Try to parse an coprocessor related
-/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
-/// "c5", ...
+/// instruction with a symbolic operand name.
+/// We accept "crN" syntax for GAS compatibility.
+/// <operand-name> ::= <prefix><number>
+/// If CoprocOp is 'c', then:
+/// <prefix> ::= c | cr
+/// If CoprocOp is 'p', then :
+/// <prefix> ::= p
+/// <number> ::= integer in range [0, 15]
static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
// Use the same layout as the tablegen'erated register name matcher. Ugly,
// but efficient.
+ if (Name.size() < 2 || Name[0] != CoprocOp)
+ return -1;
+ Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front();
+
switch (Name.size()) {
default: return -1;
- case 2:
- if (Name[0] != CoprocOp)
- return -1;
- switch (Name[1]) {
+ case 1:
+ switch (Name[0]) {
default: return -1;
case '0': return 0;
case '1': return 1;
@@ -3119,10 +3113,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '8': return 8;
case '9': return 9;
}
- case 3:
- if (Name[0] != CoprocOp || Name[1] != '1')
+ case 2:
+ if (Name[0] != '1')
return -1;
- switch (Name[2]) {
+ switch (Name[1]) {
default: return -1;
// p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
case '0': return CoprocOp == 'p'? -1: 10;
@@ -3136,8 +3130,8 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseITCondCode(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3173,8 +3167,8 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3192,8 +3186,8 @@ parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3210,8 +3204,8 @@ parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
/// coproc_option : '{' imm0_255 '}'
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
// If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3288,8 +3282,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
}
/// Parse a register list.
-bool ARMAsmParser::
-parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
assert(Parser.getTok().is(AsmToken::LCurly) &&
"Token is not a Left Curly Brace");
SMLoc S = Parser.getTok().getLoc();
@@ -3470,8 +3463,8 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
}
// parse a vector register list
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseVectorList(OperandVector &Operands) {
VectorLaneTy LaneKind;
unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
@@ -3721,8 +3714,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3792,8 +3785,8 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3843,8 +3836,8 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3877,8 +3870,8 @@ parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -4005,9 +3998,9 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
- int Low, int High) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
+ int High) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4053,8 +4046,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4082,8 +4075,8 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// lsl #n 'n' in [0,31]
/// asr #n 'n' in [1,32]
/// n == 32 encoded as n == 0.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseShifterImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4152,8 +4145,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
/// of instructions. Legal values are:
/// ror #n 'n' in {0, 8, 16, 24}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseRotImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier))
@@ -4198,8 +4191,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBitfield(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4266,8 +4259,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// postidx_reg := '+' register {, shift}
// | '-' register {, shift}
@@ -4315,8 +4308,8 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// am3offset := '+' register
// | '-' register
@@ -4388,26 +4381,24 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// Convert parsed operands to MCInst. Needed here because this instruction
/// only has two register operands, but multiplication is commutative so
/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN".
-void ARMAsmParser::
-cvtThumbMultiply(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+void ARMAsmParser::cvtThumbMultiply(MCInst &Inst,
+ const OperandVector &Operands) {
+ ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1);
+ ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1);
// If we have a three-operand form, make sure to set Rn to be the operand
// that isn't the same as Rd.
unsigned RegOp = 4;
if (Operands.size() == 6 &&
- ((ARMOperand*)Operands[4])->getReg() ==
- ((ARMOperand*)Operands[3])->getReg())
+ ((ARMOperand &)*Operands[4]).getReg() ==
+ ((ARMOperand &)*Operands[3]).getReg())
RegOp = 5;
- ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+ ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1);
Inst.addOperand(Inst.getOperand(0));
- ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+ ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2);
}
-void ARMAsmParser::
-cvtThumbBranches(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
+ const OperandVector &Operands) {
int CondOp = -1, ImmOp = -1;
switch(Inst.getOpcode()) {
case ARM::tB:
@@ -4430,7 +4421,7 @@ cvtThumbBranches(MCInst &Inst,
} else {
// outside IT blocks we can only have unconditional branches with AL
// condition code or conditional branches with non-AL condition code
- unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+ unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
switch(Inst.getOpcode()) {
case ARM::tB:
case ARM::tBcc:
@@ -4447,27 +4438,26 @@ cvtThumbBranches(MCInst &Inst,
switch(Inst.getOpcode()) {
// classify tB as either t2B or t1B based on range of immediate operand
case ARM::tB: {
- ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
- if(!op->isSignedOffset<11, 1>() && isThumbTwo())
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<11, 1>() && isThumbTwo())
Inst.setOpcode(ARM::t2B);
break;
}
// classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
case ARM::tBcc: {
- ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
- if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<8, 1>() && isThumbTwo())
Inst.setOpcode(ARM::t2Bcc);
break;
}
}
- ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
- ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
+ ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1);
+ ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
}
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
-bool ARMAsmParser::
-parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseMemory(OperandVector &Operands) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -4717,8 +4707,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
}
/// parseFPImm - A floating point immediate expression operand.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseFPImm(OperandVector &Operands) {
// Anything that can accept a floating point constant as an operand
// needs to go through here, as the regular parseExpression is
// integer only.
@@ -4744,12 +4734,12 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// integer constant. Make sure we don't try to parse an FPImm
// for these:
// vmov.i{8|16|32|64} <dreg|qreg>, #imm
- ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
- bool isVmovf = TyOp->isToken() && (TyOp->getToken() == ".f32" ||
- TyOp->getToken() == ".f64");
- ARMOperand *Mnemonic = static_cast<ARMOperand*>(Operands[0]);
- bool isFconst = Mnemonic->isToken() && (Mnemonic->getToken() == "fconstd" ||
- Mnemonic->getToken() == "fconsts");
+ ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]);
+ bool isVmovf = TyOp.isToken() &&
+ (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64");
+ ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]);
+ bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
+ Mnemonic.getToken() == "fconsts");
if (!(isVmovf || isFconst))
return MatchOperand_NoMatch;
@@ -4798,8 +4788,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
-bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic) {
+bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
SMLoc S, E;
// Check if the current operand has a custom associated parser, if so, try to
@@ -5125,7 +5114,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
}
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandVector &Operands) {
// FIXME: This is all horribly hacky. We really need a better way to deal
// with optional operands like this in the matcher table.
@@ -5138,17 +5127,17 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// conditionally adding the cc_out in the first place because we need
// to check the type of the parsed immediate operand.
if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
- !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
- static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() &&
+ static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
// Register-register 'add' for thumb does not have a cc_out operand
// when there are only two register operands.
if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
// Register-register 'add' for thumb does not have a cc_out operand
// when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
@@ -5156,13 +5145,12 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// that can handle a different range and has a cc_out operand.
if (((isThumb() && Mnemonic == "add") ||
(isThumbTwo() && Mnemonic == "sub")) &&
- Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
- static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) ||
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4()))
return true;
// For Thumb2, add/sub immediate does not have a cc_out operand for the
// imm0_4095 variant. That's the least-preferred variant when
@@ -5170,23 +5158,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// should remove the cc_out operand, we have to explicitly check that
// it's not one of the other variants. Ugh.
if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
- Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
// Nest conditions rather than one big 'if' statement for readability.
//
// If both registers are low, we're in an IT block, and the immediate is
// in range, we should use encoding T1 instead, which has a cc_out.
if (inITBlock() &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
- static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_7())
return false;
// Check against T3. If the second register is the PC, this is an
// alternate form of ADR, which uses encoding T4, so check for that too.
- if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
- static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[5]).isT2SOImm())
return false;
// Otherwise, we use encoding T4, which does not have a cc_out
@@ -5198,35 +5185,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
// use the 16-bit encoding or not.
if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isReg() &&
// If the registers aren't low regs, the destination reg isn't the
// same as one of the source regs, or the cc_out operand is zero
// outside of an IT block, we have to use the 32-bit encoding, so
// remove the cc_out operand.
- (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
- !inITBlock() ||
- (static_cast<ARMOperand*>(Operands[3])->getReg() !=
- static_cast<ARMOperand*>(Operands[5])->getReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() !=
- static_cast<ARMOperand*>(Operands[4])->getReg())))
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) ||
+ !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[5]).getReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[4]).getReg())))
return true;
// Also check the 'mul' syntax variant that doesn't specify an explicit
// destination register.
if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
// If the registers aren't low regs or the cc_out operand is zero
// outside of an IT block, we have to use the 32-bit encoding, so
// remove the cc_out operand.
- (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
!inITBlock()))
return true;
@@ -5239,32 +5225,32 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// anyway.
if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
(Operands.size() == 5 || Operands.size() == 6) &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ (static_cast<ARMOperand &>(*Operands[4]).isImm() ||
(Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[5])->isImm())))
+ static_cast<ARMOperand &>(*Operands[5]).isImm())))
return true;
return false;
}
-bool ARMAsmParser::shouldOmitPredicateOperand(
- StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
- static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") {
- if (static_cast<ARMOperand *>(Operands[3])->isToken() &&
- static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32")
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+ if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
RegIdx = 4;
- if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() &&
- (ARMMCRegisterClasses[ARM::DPRRegClassID]
- .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) ||
- ARMMCRegisterClasses[ARM::QPRRegClassID]
- .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg())))
+ if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
+ (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) ||
+ ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg())))
return true;
}
return false;
@@ -5309,8 +5295,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc NameLoc, OperandVector &Operands) {
// FIXME: Can this be done via tablegen in some fashion?
bool RequireVFPRegisterListCheck;
bool AcceptSinglePrecisionOnly;
@@ -5489,12 +5474,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Parser.Lex(); // Consume the EndOfStatement
if (RequireVFPRegisterListCheck) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands.back());
- if (AcceptSinglePrecisionOnly && !Op->isSPRRegList())
- return Error(Op->getStartLoc(),
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
+ if (AcceptSinglePrecisionOnly && !Op.isSPRRegList())
+ return Error(Op.getStartLoc(),
"VFP/Neon single precision register expected");
- if (AcceptDoublePrecisionOnly && !Op->isDPRRegList())
- return Error(Op->getStartLoc(),
+ if (AcceptDoublePrecisionOnly && !Op.isDPRRegList())
+ return Error(Op.getStartLoc(),
"VFP/Neon double precision register expected");
}
@@ -5505,20 +5490,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// try to remove a cc_out operand that was explicitly set on the the
// mnemonic, of course (CarrySetting == true). Reason number #317 the
// table driven matcher doesn't fit well with the ARM instruction set.
- if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands))
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// Some instructions have the same mnemonic, but don't always
// have a predicate. Distinguish them here and delete the
// predicate if needed.
- if (shouldOmitPredicateOperand(Mnemonic, Operands)) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ if (shouldOmitPredicateOperand(Mnemonic, Operands))
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// ARM mode 'blx' need special handling, as the register operand version
// is predicable, but the label operand version is not. So, we can't rely
@@ -5526,11 +5505,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// a k_CondCode operand in the list. If we're trying to match the label
// version, remove the k_CondCode operand here.
if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
- static_cast<ARMOperand*>(Operands[2])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ static_cast<ARMOperand &>(*Operands[2]).isImm())
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// Adjust operands of ldrexd/strexd to MCK_GPRPair.
// ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
@@ -5543,53 +5519,50 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Mnemonic == "stlexd")) {
bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
unsigned Idx = isLoad ? 2 : 3;
- ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
- ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+ ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
// Adjust only if Op1 and Op2 are GPRs.
- if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
- MRC.contains(Op2->getReg())) {
- unsigned Reg1 = Op1->getReg();
- unsigned Reg2 = Op2->getReg();
+ if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+ MRC.contains(Op2.getReg())) {
+ unsigned Reg1 = Op1.getReg();
+ unsigned Reg2 = Op2.getReg();
unsigned Rt = MRI->getEncodingValue(Reg1);
unsigned Rt2 = MRI->getEncodingValue(Reg2);
// Rt2 must be Rt + 1 and Rt must be even.
if (Rt + 1 != Rt2 || (Rt & 1)) {
- Error(Op2->getStartLoc(), isLoad ?
- "destination operands must be sequential" :
- "source operands must be sequential");
+ Error(Op2.getStartLoc(), isLoad
+ ? "destination operands must be sequential"
+ : "source operands must be sequential");
return true;
}
unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
&(MRI->getRegClass(ARM::GPRPairRegClassID)));
- Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
- Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
- NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
- delete Op1;
- delete Op2;
+ Operands[Idx] =
+ ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+ Operands.erase(Operands.begin() + Idx + 1);
}
}
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
- ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]);
- ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]);
- if (Op3->isMem()) {
- assert(Op2->isReg() && "expected register argument");
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ if (Op3.isMem()) {
+ assert(Op2.isReg() && "expected register argument");
unsigned SuperReg = MRI->getMatchingSuperReg(
- Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+ Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
assert(SuperReg && "expected register pair");
unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
- Operands.insert(Operands.begin() + 3,
- ARMOperand::CreateReg(PairedReg,
- Op2->getStartLoc(),
- Op2->getEndLoc()));
+ Operands.insert(
+ Operands.begin() + 3,
+ ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc()));
}
}
@@ -5599,19 +5572,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// so the Mnemonic is the original name "subs" and delete the predicate
// operand so it will match the table entry.
if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]);
- Operands.erase(Operands.begin());
- delete Op0;
- Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc));
-
- ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]);
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
+ Operands.front() = ARMOperand::CreateToken(Name, NameLoc);
Operands.erase(Operands.begin() + 1);
- delete Op1;
}
return false;
}
@@ -5657,9 +5624,8 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
// FIXME: We would really like to be able to tablegen'erate this.
-bool ARMAsmParser::
-validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::validateInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
@@ -5682,7 +5648,7 @@ validateInstruction(MCInst &Inst,
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
for (unsigned I = 1; I < Operands.size(); ++I)
- if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+ if (static_cast<ARMOperand &>(*Operands[I]).isCondCode())
CondLoc = Operands[I]->getStartLoc();
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
@@ -5782,8 +5748,8 @@ validateInstruction(MCInst &Inst,
// in the register list.
unsigned Rn = Inst.getOperand(0).getReg();
bool HasWritebackToken =
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
bool ListContainsBase;
if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
@@ -5843,11 +5809,10 @@ validateInstruction(MCInst &Inst,
// this first statement is always true for the new Inst. Essentially, the
// destination is unconditionally copied into the second source operand
// without checking to see if it matches what we actually parsed.
- if (Operands.size() == 6 &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[5])->getReg()) &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[4])->getReg())) {
+ if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[5]).getReg()) &&
+ (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[4]).getReg())) {
return Error(Operands[3]->getStartLoc(),
"destination register must match source register");
}
@@ -5900,23 +5865,23 @@ validateInstruction(MCInst &Inst,
}
// Final range checking for Thumb unconditional branch instructions.
case ARM::tB:
- if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+ if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
return Error(Operands[2]->getStartLoc(), "branch target out of range");
break;
case ARM::t2B: {
int op = (Operands[2]->isImm()) ? 2 : 3;
- if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>())
return Error(Operands[op]->getStartLoc(), "branch target out of range");
break;
}
// Final range checking for Thumb conditional branch instructions.
case ARM::tBcc:
- if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>())
return Error(Operands[2]->getStartLoc(), "branch target out of range");
break;
case ARM::t2Bcc: {
int Op = (Operands[2]->isImm()) ? 2 : 3;
- if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>())
return Error(Operands[Op]->getStartLoc(), "branch target out of range");
break;
}
@@ -5931,19 +5896,19 @@ validateInstruction(MCInst &Inst,
// lead to bugs that are difficult to find since this is an easy mistake
// to make.
int i = (Operands[3]->isImm()) ? 3 : 4;
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
if (CE) break;
- const MCExpr *E = dyn_cast<MCExpr>(Op->getImm());
+ const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
if (!E) break;
const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
- ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) {
- return Error(Op->getStartLoc(),
- "immediate expression for mov requires :lower16: or :upper16");
- break;
- }
- }
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16))
+ return Error(
+ Op.getStartLoc(),
+ "immediate expression for mov requires :lower16: or :upper16");
+ break;
+ }
}
return false;
@@ -6205,9 +6170,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
}
}
-bool ARMAsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::processInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6264,8 +6228,8 @@ processInstruction(MCInst &Inst,
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
Inst.getOperand(1).getImm() <= 0xff &&
- !(static_cast<ARMOperand*>(Operands[2])->isToken() &&
- static_cast<ARMOperand*>(Operands[2])->getToken() == ".w"))
+ !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -7355,8 +7319,8 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
- !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+ !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -7559,7 +7523,7 @@ processInstruction(MCInst &Inst,
case ARM::LDMIA_UPD:
// If this is a load of a single register via a 'pop', then we should use
// a post-indexed LDR instruction instead, per the ARM ARM.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" &&
Inst.getNumOperands() == 5) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::LDR_POST_IMM);
@@ -7577,7 +7541,7 @@ processInstruction(MCInst &Inst,
case ARM::STMDB_UPD:
// If this is a store of a single register via a 'push', then we should use
// a pre-indexed STR instruction instead, per the ARM ARM.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" &&
Inst.getNumOperands() == 5) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::STR_PRE_IMM);
@@ -7593,7 +7557,7 @@ processInstruction(MCInst &Inst,
case ARM::t2ADDri12:
// If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
// mnemonic was used (not "addw"), encoding T3 is preferred.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" ||
ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
break;
Inst.setOpcode(ARM::t2ADDri);
@@ -7602,7 +7566,7 @@ processInstruction(MCInst &Inst,
case ARM::t2SUBri12:
// If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
// mnemonic was used (not "subw"), encoding T3 is preferred.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" ||
ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
break;
Inst.setOpcode(ARM::t2SUBri);
@@ -7638,9 +7602,9 @@ processInstruction(MCInst &Inst,
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
(unsigned)Inst.getOperand(2).getImm() > 255 ||
((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
break;
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -7661,8 +7625,8 @@ processInstruction(MCInst &Inst,
// 'as' behaviour. Make sure the wide encoding wasn't explicit.
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
Inst.getOperand(5).getReg() != 0 ||
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
break;
MCInst TmpInst;
TmpInst.setOpcode(ARM::tADDhirr);
@@ -7719,8 +7683,8 @@ processInstruction(MCInst &Inst,
// an error in validateInstruction().
unsigned Rn = Inst.getOperand(0).getReg();
bool hasWritebackToken =
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
bool listContainsBase;
if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
(!listContainsBase && !hasWritebackToken) ||
@@ -7782,10 +7746,10 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
(unsigned)Inst.getOperand(1).getImm() <= 255 &&
((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
- Inst.getOperand(4).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
// The operands aren't in the same order for tMOVi8...
MCInst TmpInst;
TmpInst.setOpcode(ARM::tMOVi8);
@@ -7806,8 +7770,8 @@ processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -7829,8 +7793,8 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == 0 &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Illegal opcode!");
@@ -7942,9 +7906,10 @@ processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(2).getReg())) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
- !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -7981,9 +7946,10 @@ processInstruction(MCInst &Inst,
(Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
- !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8063,11 +8029,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
}
static const char *getSubtargetFeatureName(unsigned Val);
-bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -8136,7 +8101,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -8144,7 +8109,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction",
- ((ARMOperand*)Operands[0])->getLocRange());
+ ((ARMOperand &)*Operands[0]).getLocRange());
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
@@ -8154,12 +8119,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
case Match_ImmRange0_15: {
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
}
case Match_ImmRange0_239: {
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
@@ -8175,7 +8140,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_DupAlignedMemoryRequires64or128:
case Match_AlignedMemoryRequires64or128or256:
{
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
switch (MatchResult) {
default:
@@ -8923,28 +8888,22 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
}
// RAII object to make sure parsed operands are deleted.
- struct CleanupObject {
- SmallVector<MCParsedAsmOperand *, 1> Operands;
- ~CleanupObject() {
- for (unsigned I = 0, E = Operands.size(); I != E; ++I)
- delete Operands[I];
- }
- } CO;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(CO.Operands))
+ if (parseRegisterList(Operands))
return false;
- ARMOperand *Op = (ARMOperand*)CO.Operands[0];
- if (!IsVector && !Op->isRegList()) {
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!IsVector && !Op.isRegList()) {
Error(L, ".save expects GPR registers");
return false;
}
- if (IsVector && !Op->isDPRRegList()) {
+ if (IsVector && !Op.isDPRRegList()) {
Error(L, ".vsave expects DPR registers");
return false;
}
- getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
+ getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
return false;
}
@@ -9468,23 +9427,23 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
- ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+ ARMOperand &Op = static_cast<ARMOperand &>(AsmOp);
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
// immediate in the syntax.
switch (Kind) {
default: break;
case MCK__35_0:
- if (Op->isImm())
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()))
+ if (Op.isImm())
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
if (CE->getValue() == 0)
return Match_Success;
break;
case MCK_ARMSOImm:
- if (Op->isImm()) {
- const MCExpr *SOExpr = Op->getImm();
+ if (Op.isImm()) {
+ const MCExpr *SOExpr = Op.getImm();
int64_t Value;
if (!SOExpr->EvaluateAsAbsolute(Value))
return Match_Success;
@@ -9493,8 +9452,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
}
break;
case MCK_GPRPair:
- if (Op->isReg() &&
- MRI->getRegClass(ARM::GPRRegClassID).contains(Op->getReg()))
+ if (Op.isReg() &&
+ MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
return Match_Success;
break;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e4b785d..228fb57 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1092,13 +1092,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
if (isSub) {
O << ", "
<< markup("<imm:")
- << "#-" << -OffImm
+ << "#-" << formatImm(-OffImm)
<< markup(">");
}
else if (AlwaysPrintImm0 || OffImm > 0) {
O << ", "
<< markup("<imm:")
- << "#" << OffImm
+ << "#" << formatImm(OffImm)
<< markup(">");
}
O << "]" << markup(">");
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 42a1cbb..1686d76 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -295,7 +295,12 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x7f,
+ MO_OPTION_MASK = 0x3f,
+
+ /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+ /// to the symbol is for an import stub. This is used for DLL import
+ /// storage class indication on Windows.
+ MO_DLLIMPORT = 0x40,
/// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it
/// represents a symbol which, if indirect, will get special Darwin mangling
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index a4d13ed..7b5d8b0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -992,7 +992,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
return;
const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
- if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
+ unsigned Type = MCELF::GetType(SD);
+ if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc)
Streamer.EmitThumbFunc(Symbol);
}
@@ -1160,7 +1161,7 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
- AddValueSymbols(PersonalityRef);
+ visitUsedExpr(*PersonalityRef);
MCDataFragment *DF = getOrCreateDataFragment();
DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
PersonalityRef,
@@ -1332,6 +1333,12 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
return S;
}
+MCStreamer *createARMNullStreamer(MCContext &Ctx) {
+ MCStreamer *S = llvm::createNullStreamer(Ctx);
+ new ARMTargetStreamer(*S);
+ return S;
+}
+
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5b51a52..b8ee555 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
// we have a movt or a movw, but that led to misleadingly results.
// This is now disallowed in the the AsmParser in validateInstruction()
// so this should never happen.
- assert(0 && "expression without :upper16: or :lower16:");
- return 0;
+ llvm_unreachable("expression without :upper16: or :lower16:");
}
uint32_t ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 87ea875..e545e3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -41,33 +41,6 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return false;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index d819139..c5c0b10 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -59,7 +59,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 04d63a7..2b3855d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -427,6 +427,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
+ // Register the null streamer.
+ TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 8853a8c..5326e56 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -51,6 +51,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst);
+MCStreamer *createARMNullStreamer(MCContext &Ctx);
+
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index ecfa4e5..186776a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -32,6 +32,7 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target,
+ unsigned Type,
unsigned Log2Size,
uint64_t &FixedValue);
void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
@@ -251,11 +252,11 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target,
+ unsigned Type,
unsigned Log2Size,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = MachO::ARM_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -272,6 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
@@ -374,7 +376,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ Target, RelocType, Log2Size,
+ FixedValue);
}
// Get the symbol data, if any.
@@ -392,7 +395,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Offset += 1 << Log2Size;
if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ Target, RelocType, Log2Size,
+ FixedValue);
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e3cfb05..ad3f1ca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,147 +11,12 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
-
-namespace {
-// A class to keep track of assembler-generated constant pools that are use to
-// implement the ldr-pseudo.
-class ConstantPool {
- typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
- EntryVecTy Entries;
-
-public:
- // Initialize a new empty constant pool
- ConstantPool() {}
-
- // Add a new entry to the constant pool in the next slot.
- // \param Value is the new entry to put in the constant pool.
- //
- // \returns a MCExpr that references the newly inserted value
- const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
-
- // Emit the contents of the constant pool using the provided streamer.
- void emitEntries(MCStreamer &Streamer);
-
- // Return true if the constant pool is empty
- bool empty();
-};
-}
-
-namespace llvm {
-class AssemblerConstantPools {
- // Map type used to keep track of per-Section constant pools used by the
- // ldr-pseudo opcode. The map associates a section to its constant pool. The
- // constant pool is a vector of (label, value) pairs. When the ldr
- // pseudo is parsed we insert a new (label, value) pair into the constant pool
- // for the current section and add MCSymbolRefExpr to the new label as
- // an opcode to the ldr. After we have parsed all the user input we
- // output the (label, value) pairs in each constant pool at the end of the
- // section.
- //
- // We use the MapVector for the map type to ensure stable iteration of
- // the sections at the end of the parse. We need to iterate over the
- // sections in a stable order to ensure that we have print the
- // constant pools in a deterministic order when printing an assembly
- // file.
- typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
- ConstantPoolMapTy ConstantPools;
-
-public:
- AssemblerConstantPools() {}
- ~AssemblerConstantPools() {}
-
- void emitAll(MCStreamer &Streamer);
- void emitForCurrentSection(MCStreamer &Streamer);
- const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
-
-private:
- ConstantPool *getConstantPool(const MCSection *Section);
- ConstantPool &getOrCreateConstantPool(const MCSection *Section);
-};
-}
-
-//
-// ConstantPool implementation
-//
-// Emit the contents of the constant pool using the provided streamer.
-void ConstantPool::emitEntries(MCStreamer &Streamer) {
- if (Entries.empty())
- return;
- Streamer.EmitCodeAlignment(4); // align to 4-byte address
- Streamer.EmitDataRegion(MCDR_DataRegion);
- for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
- I != E; ++I) {
- Streamer.EmitLabel(I->first);
- Streamer.EmitValue(I->second, 4);
- }
- Streamer.EmitDataRegion(MCDR_DataRegionEnd);
- Entries.clear();
-}
-
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
- MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
-
- Entries.push_back(std::make_pair(CPEntryLabel, Value));
- return MCSymbolRefExpr::Create(CPEntryLabel, Context);
-}
-
-bool ConstantPool::empty() { return Entries.empty(); }
-
-//
-// AssemblerConstantPools implementation
-//
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
- ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
- if (CP == ConstantPools.end())
- return nullptr;
-
- return &CP->second;
-}
-
-ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
- return ConstantPools[Section];
-}
-
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
- ConstantPool &CP) {
- if (!CP.empty()) {
- Streamer.SwitchSection(Section);
- CP.emitEntries(Streamer);
- }
-}
-
-void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
- // Dump contents of assembler constant pools.
- for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
- CPE = ConstantPools.end();
- CPI != CPE; ++CPI) {
- const MCSection *Section = CPI->first;
- ConstantPool &CP = CPI->second;
-
- emitConstantPool(Streamer, Section, CP);
- }
-}
-
-void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
- const MCSection *Section = Streamer.getCurrentSection().first;
- if (ConstantPool *CP = getConstantPool(Section)) {
- emitConstantPool(Streamer, Section, *CP);
- }
-}
-
-const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
- const MCExpr *Expr) {
- const MCSection *Section = Streamer.getCurrentSection().first;
- return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
-}
-
//
// ARMTargetStreamer Implemenation
//
@@ -175,78 +40,34 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
// The remaining callbacks should be handled separately by each
// streamer.
-void ARMTargetStreamer::emitFnStart() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFnEnd() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitCantUnwind() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitHandlerData() {
- llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitFnStart() {}
+void ARMTargetStreamer::emitFnEnd() {}
+void ARMTargetStreamer::emitCantUnwind() {}
+void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {}
+void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {}
+void ARMTargetStreamer::emitHandlerData() {}
void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
- int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPad(int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void
-ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitUnwindRaw(
- int64_t StackOffset, const SmallVectorImpl<uint8_t> &Opcodes) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::switchVendor(StringRef Vendor) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
- llvm_unreachable("unimplemented");
-}
+ int64_t Offset) {}
+void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {}
+void ARMTargetStreamer::emitPad(int64_t Offset) {}
+void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {}
+void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+}
+void ARMTargetStreamer::switchVendor(StringRef Vendor) {}
+void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
void ARMTargetStreamer::emitTextAttribute(unsigned Attribute,
- StringRef String) {
- llvm_unreachable("unimplemented");
-}
+ StringRef String) {}
void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
- unsigned IntValue,
- StringRef StringValue) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitArch(unsigned Arch) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitObjectArch(unsigned Arch) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFPU(unsigned FPU) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::finishAttributeSection() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::AnnotateTLSDescriptorSequence(
- const MCSymbolRefExpr *SRE) {
- llvm_unreachable("unimplemented");
-}
+ unsigned IntValue,
+ StringRef StringValue) {}
+void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
+void ARMTargetStreamer::emitFPU(unsigned FPU) {}
+void ARMTargetStreamer::finishAttributeSection() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
+void
+ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
-void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
- llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index be29dc5..baa97a7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -21,6 +21,9 @@
using namespace llvm;
+Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {}
+
bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index f61874b..a227f8e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef __THUMB_FRAMEINFO_H_
-#define __THUMB_FRAMEINFO_H_
+#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_ARM_THUMB1FRAMELOWERING_H
#include "ARMFrameLowering.h"
-#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "Thumb1RegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -24,9 +23,7 @@ namespace llvm {
class Thumb1FrameLowering : public ARMFrameLowering {
public:
- explicit Thumb1FrameLowering(const ARMSubtarget &sti)
- : ARMFrameLowering(sti) {
- }
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 6267ecf..09debe7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1010,7 +1010,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
AttributeSet FnAttrs = MF.getFunction()->getAttributes();
OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
- MinimizeSize = STI->isMinSize();
+ MinimizeSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 15b574d..f610fbb 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1577,6 +1577,10 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out) << iName << "->setName(\"";
printEscapedString(cxi->getName());
Out << "\");";
+ nl(Out) << iName << "->setVolatile("
+ << (cxi->isVolatile() ? "true" : "false") << ");";
+ nl(Out) << iName << "->setWeak("
+ << (cxi->isWeak() ? "true" : "false") << ");";
break;
}
case Instruction::AtomicRMW: {
@@ -1607,6 +1611,8 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out) << iName << "->setName(\"";
printEscapedString(rmwi->getName());
Out << "\");";
+ nl(Out) << iName << "->setVolatile("
+ << (rmwi->isVolatile() ? "true" : "false") << ");";
break;
}
case Instruction::LandingPad: {
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index d551ca9..21df12f 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -165,8 +165,8 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
// versions.
- if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret
- && !DisableDeallocRet) {
+ if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() &&
+ MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) {
// Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC
// instruction if we encounter it.
MachineBasicBlock::iterator BeforeJMPR =
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 446af16..2d4b0b9 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -11,20 +11,16 @@
#define HEXAGON_FRAMEINFO_H
#include "Hexagon.h"
-#include "HexagonSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class HexagonFrameLowering : public TargetFrameLowering {
private:
- const HexagonSubtarget &STI;
void determineFrameLayout(MachineFunction &MF) const;
public:
- explicit HexagonFrameLowering(const HexagonSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
- }
+ explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index b8e5d24..a460ea4 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -463,9 +463,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
SDValue StackPtr =
- DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
- getPointerTy());
+ DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -720,7 +721,10 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// Check it to be lr
- if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
+ if (Reg == QRI->getRARegister()) {
FuncInfo->setHasClobberLR(true);
break;
}
@@ -812,9 +816,9 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
- SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
- TM.getRegisterInfo()->getStackRegister(),
- Sub);
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
SDValue Ops[2] = { ArgAdjust, CopyChain };
return DAG.getMergeValues(Ops, dl);
@@ -944,21 +948,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
-HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(4);
- SDValue TrueVal = Op.getOperand(2);
- SDValue FalseVal = Op.getOperand(3);
- SDLoc dl(Op);
- SDNode* OpNode = Op.getNode();
- EVT SVT = OpNode->getValueType(0);
-
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC);
- return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal);
-}
-
-SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
SDLoc dl(Op);
@@ -975,7 +964,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
@@ -1001,7 +990,8 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ const HexagonRegisterInfo *TRI =
+ static_cast<const HexagonRegisterInfo *>(DAG.getTarget().getRegisterInfo());
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
@@ -1053,429 +1043,422 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
-HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
- &targetmachine)
- : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
- TM(targetmachine) {
-
- const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
- // Set up the register classes.
- addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
-
- if (QRI->Subtarget.hasV5TOps()) {
- addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
- }
+ const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
- addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
- computeRegisterProperties();
+ if (Subtarget.hasV5TOps()) {
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+ }
- // Align loop entry
- setPrefLoopAlignment(4);
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
- // Limits for inline expansion of memcpy/memmove
- MaxStoresPerMemcpy = 6;
- MaxStoresPerMemmove = 6;
+ computeRegisterProperties();
- //
- // Library calls for unsupported operations
- //
+ // Align loop entry
+ setPrefLoopAlignment(4);
- setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
- setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
-
- setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
- setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
-
- setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
- setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
-
- setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
-
- setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
- setOperationAction(ISD::SREM, MVT::i64, Expand);
-
- setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
-
- setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
-
- setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-
- setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
- setOperationAction(ISD::FDIV, MVT::f32, Expand);
-
- setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
- setOperationAction(ISD::FDIV, MVT::f64, Expand);
-
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
-
- if (QRI->Subtarget.hasV5TOps()) {
- // Hexagon V5 Support.
- setOperationAction(ISD::FADD, MVT::f32, Legal);
- setOperationAction(ISD::FADD, MVT::f64, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
-
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
-
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
-
- setOperationAction(ISD::FNEG, MVT::f32, Legal);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- } else {
+ // Limits for inline expansion of memcpy/memmove
+ MaxStoresPerMemcpy = 6;
+ MaxStoresPerMemmove = 6;
- // Expand fp<->uint.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ //
+ // Library calls for unsupported operations
+ //
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ if (Subtarget.hasV5TOps()) {
+ // Hexagon V5 Support.
+ setOperationAction(ISD::FADD, MVT::f32, Legal);
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ } else {
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
- setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
- setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setOperationAction(ISD::FADD, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
- setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
- setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
- setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
- setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
- setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setOperationAction(ISD::MUL, MVT::f32, Expand);
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
- setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
- setOperationAction(ISD::SUB, MVT::f64, Expand);
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setOperationAction(ISD::SUB, MVT::f32, Expand);
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::f32, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FNEG, MVT::f32, Expand);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- }
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
-
- setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
-
- setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
-
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
- // Turn FP extload into load/fextend.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- // Hexagon has a i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
- // Turn FP truncstore into trunc + store.
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // Custom legalize GlobalAddress nodes into CONST32.
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
- setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- // Truncate action?
- setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
-
- // Hexagon doesn't have sext_inreg, replace them with shl/sra.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
-
- // Hexagon has no REM or DIVREM operations.
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
-
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-
- // Lower SELECT_CC to SETCC and SELECT.
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-
- if (QRI->Subtarget.hasV5TOps()) {
-
- // We need to make the operation type of SELECT node to be Custom,
- // such that we don't go into the infinite loop of
- // select -> setcc -> select_cc -> select loop.
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
- setOperationAction(ISD::SELECT, MVT::f64, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
- } else {
-
- // Hexagon has no select or setcc: expand to SELECT_CC.
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f32, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
- // This is a workaround documented in DAGCombiner.cpp:2892 We don't
- // support SELECT_CC on every type.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+
+ if (Subtarget.hasV5TOps()) {
+
+ // We need to make the operation type of SELECT node to be Custom,
+ // such that we don't go into the infinite loop of
+ // select -> setcc -> select_cc -> select loop.
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
- }
+ } else {
- if (EmitJumpTables) {
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
- } else {
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- }
- // Increase jump tables cutover to 5, was 4.
- setMinimumJumpTableEntries(5);
-
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i64, Expand);
-
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-
- // In V4, we have double word add/sub with carry. The problem with
- // modelling this instruction is that it produces 2 results - Rdd and Px.
- // To model update of Px, we will have to use Defs[p0..p3] which will
- // cause any predicate live range to spill. So, we pretend we dont't
- // have these instructions.
- setOperationAction(ISD::ADDE, MVT::i8, Expand);
- setOperationAction(ISD::ADDE, MVT::i16, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i8, Expand);
- setOperationAction(ISD::SUBE, MVT::i16, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
- setOperationAction(ISD::ADDC, MVT::i8, Expand);
- setOperationAction(ISD::ADDC, MVT::i16, Expand);
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i8, Expand);
- setOperationAction(ISD::SUBC, MVT::i16, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i32, Expand);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::ROTL , MVT::i32, Expand);
- setOperationAction(ISD::ROTR , MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FPOW , MVT::f32, Expand);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-
- setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
-
- if (TM.getSubtargetImpl()->isSubtargetV2()) {
- setExceptionPointerRegister(Hexagon::R20);
- setExceptionSelectorRegister(Hexagon::R21);
- } else {
- setExceptionPointerRegister(Hexagon::R0);
- setExceptionSelectorRegister(Hexagon::R1);
- }
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(5);
+
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // In V4, we have double word add/sub with carry. The problem with
+ // modelling this instruction is that it produces 2 results - Rdd and Px.
+ // To model update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't
+ // have these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
+ if (Subtarget.isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(2);
- // Needed for DYNAMIC_STACKALLOC expansion.
- unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
- setStackPointerRegisterToSaveRestore(StackRegister);
- setSchedulingPreference(Sched::VLIW);
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(TM.getRegisterInfo());
+ setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
+ setSchedulingPreference(Sched::VLIW);
}
-
const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
@@ -1577,7 +1560,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return Op;
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
@@ -1641,8 +1623,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
- return QRI->Subtarget.hasV5TOps();
+ return TM.getSubtarget<HexagonSubtarget>().hasV5TOps();
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4f27c27..ec16cc8 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -74,8 +74,8 @@ namespace llvm {
unsigned& RetSize) const;
public:
- HexagonTargetMachine &TM;
- explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+ const TargetMachine &TM;
+ explicit HexagonTargetLowering(const TargetMachine &targetmachine);
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
@@ -124,7 +124,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDValue Callee) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index ea6367a..1c95e06 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const {
int NewOp = opc;
if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
NewOp = Hexagon::getPredOldOpcode(NewOp);
- if (NewOp < 0)
- assert(0 && "Couldn't change predicate new instruction to its old form.");
+ assert(NewOp >= 0 &&
+ "Couldn't change predicate new instruction to its old form.");
}
if (isNewValueStore(NewOp)) { // Convert into non-new-value format
NewOp = Hexagon::getNonNVStore(NewOp);
- if (NewOp < 0)
- assert(0 && "Couldn't change new-value store to its old form.");
+ assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
}
return NewOp;
}
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 7dd6e95..6fcaa20 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
#define DEBUG_TYPE "misched"
-/// Platform specific modifications to DAG.
+/// Platform-specific modifications to DAG.
void VLIWMachineScheduler::postprocessDAG() {
SUnit* LastSequentialCall = nullptr;
// Currently we only catch the situation when compare gets scheduled
@@ -150,7 +150,7 @@ void VLIWMachineScheduler::schedule() {
buildDAGWithRegPressure();
- // Postprocess the DAG to add platform specific artificial dependencies.
+ // Postprocess the DAG to add platform-specific artificial dependencies.
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 99100a1..8c41086 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -100,7 +100,7 @@ public:
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
virtual void schedule() override;
- /// Perform platform specific DAG postprocessing.
+ /// Perform platform-specific DAG postprocessing.
void postprocessDAG();
};
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 9e1e0fd..b5db997 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
bool llvm::flag_aligned_memcpy;
-HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
- &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 8ba6108..b40b303 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -18,11 +18,9 @@
namespace llvm {
-class HexagonTargetMachine;
-
class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ explicit HexagonSelectionDAGInfo(const DataLayout &DL);
~HexagonSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 70c87fa..657893f 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,10 +48,8 @@ EnableIEEERndNear(
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Generate non-chopped conversion from fp to int."));
-HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
- HexagonGenSubtargetInfo(TT, CPU, FS),
- CPUString(CPU.str()) {
-
+HexagonSubtarget &
+HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
// If the programmer has not specified a Hexagon version, default to -mv4.
if (CPUString.empty())
CPUString = "hexagonv4";
@@ -70,6 +68,15 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
}
ParseSubtargetFeatures(CPUString, FS);
+ return *this;
+}
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM)
+ : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
+ DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL), FrameLowering() {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 690bef0..b184e62 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -14,6 +14,11 @@
#ifndef Hexagon_SUBTARGET_H
#define Hexagon_SUBTARGET_H
+#include "HexagonFrameLowering.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -28,6 +33,7 @@ namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
+
bool UseMemOps;
bool ModeIEEERndNear;
@@ -37,16 +43,35 @@ public:
};
HexagonArchEnum HexagonArchVersion;
+private:
std::string CPUString;
+ const DataLayout DL; // Calculates type size & alignment.
+ HexagonInstrInfo InstrInfo;
+ HexagonTargetLowering TLInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
InstrItineraryData InstrItins;
public:
- HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const HexagonTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const HexagonFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ const HexagonSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b923764..7831410 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -67,15 +67,10 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM,
- CodeModel::Model CM,
+ Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32") ,
- Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70b835e..d88178e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,12 +14,8 @@
#ifndef HexagonTARGETMACHINE_H
#define HexagonTARGETMACHINE_H
-#include "HexagonFrameLowering.h"
-#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
-#include "HexagonSelectionDAGInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -27,13 +23,7 @@ namespace llvm {
class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
- const DataLayout DL; // Calculates type size & alignment.
HexagonSubtarget Subtarget;
- HexagonInstrInfo InstrInfo;
- HexagonTargetLowering TLInfo;
- HexagonSelectionDAGInfo TSInfo;
- HexagonFrameLowering FrameLowering;
- const InstrItineraryData* InstrItins;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -42,33 +32,29 @@ public:
CodeGenOpt::Level OL);
const HexagonInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const HexagonSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const HexagonRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const InstrItineraryData* getInstrItineraryData() const override {
- return InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
-
-
const HexagonTargetLowering* getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
const HexagonFrameLowering* getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
-
const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
-
- const DataLayout *getDataLayout() const override { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index d464dd9..fadfeed 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -15,20 +15,15 @@
#define MSP430_FRAMEINFO_H
#include "MSP430.h"
-#include "MSP430Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MSP430Subtarget;
-
class MSP430FrameLowering : public TargetFrameLowering {
protected:
- const MSP430Subtarget &STI;
public:
- explicit MSP430FrameLowering(const MSP430Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2),
- STI(sti) {}
+ explicit MSP430FrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c5901bc..3d3ee92 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -57,11 +57,8 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
"Assume hardware multiplier cannot be used inside interrupts"),
clEnumValEnd));
-MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
- TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()) {
-
- TD = getDataLayout();
+MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
@@ -1032,7 +1029,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = TD->getPointerSize();
+ uint64_t SlotSize = getDataLayout()->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
true);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -1055,7 +1052,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ DAG.getConstant(getDataLayout()->getPointerSize(), MVT::i16);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 3ced61d..3e2f344 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -66,12 +66,9 @@ namespace llvm {
};
}
- class MSP430Subtarget;
- class MSP430TargetMachine;
-
class MSP430TargetLowering : public TargetLowering {
public:
- explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+ explicit MSP430TargetLowering(const TargetMachine &TM);
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
@@ -170,9 +167,6 @@ namespace llvm {
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
-
- const MSP430Subtarget &Subtarget;
- const DataLayout *TD;
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 0c04ddb..ccb6c09 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -30,9 +30,9 @@ using namespace llvm;
// Pin the vtable to this file.
void MSP430InstrInfo::anchor() {}
-MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI)
: MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
- RI(tm) {}
+ RI() {}
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 1ffcebb..e6baaef 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -22,7 +22,7 @@
namespace llvm {
-class MSP430TargetMachine;
+class MSP430Subtarget;
/// MSP430II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -44,7 +44,7 @@ class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
virtual void anchor();
public:
- explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+ explicit MSP430InstrInfo(MSP430Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 341fb64..691bcee 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -32,10 +32,8 @@ using namespace llvm;
#include "MSP430GenRegisterInfo.inc"
// FIXME: Provide proper call frame setup / destroy opcodes.
-MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm)
- : MSP430GenRegisterInfo(MSP430::PCW), TM(tm) {
- StackAlign = TM.getFrameLowering()->getStackAlignment();
-}
+MSP430RegisterInfo::MSP430RegisterInfo()
+ : MSP430GenRegisterInfo(MSP430::PCW) {}
const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index a607528..cb01961 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -21,18 +21,9 @@
namespace llvm {
-class TargetInstrInfo;
-class MSP430TargetMachine;
-
struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
-private:
- MSP430TargetMachine &TM;
-
- /// StackAlign - Default stack alignment.
- ///
- unsigned StackAlign;
public:
- MSP430RegisterInfo(MSP430TargetMachine &tm);
+ MSP430RegisterInfo();
/// Code Generation virtual methods...
const MCPhysReg *
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index c700383..3897ef6 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "msp430-selectiondag-info"
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index fa81948..cb04adc 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -22,7 +22,7 @@ class MSP430TargetMachine;
class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+ explicit MSP430SelectionDAGInfo(const DataLayout &DL);
~MSP430SelectionDAGInfo();
};
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 68ad091..dbddc52 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -25,12 +25,15 @@ using namespace llvm;
void MSP430Subtarget::anchor() { }
-MSP430Subtarget::MSP430Subtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS) :
- MSP430GenSubtargetInfo(TT, CPU, FS) {
- std::string CPUName = "generic";
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+ ParseSubtargetFeatures("generic", FS);
+ return *this;
}
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM)
+ : MSP430GenSubtargetInfo(TT, CPU, FS),
+ // FIXME: Check DataLayout string.
+ DL("e-m:e-p:16:16-i32:16:32-n8:16"), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 4d8792e..0152ad1 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -14,6 +14,12 @@
#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
#define LLVM_TARGET_MSP430_SUBTARGET_H
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -26,16 +32,33 @@ class StringRef;
class MSP430Subtarget : public MSP430GenSubtargetInfo {
virtual void anchor();
bool ExtendedInsts;
+ const DataLayout DL; // Calculates type size & alignment
+ MSP430FrameLowering FrameLowering;
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
MSP430Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
+
+ MSP430Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const MSP430TargetLowering *getTargetLowering() const { return &TLInfo; }
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 50be2be..5ca36f2 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -24,19 +24,13 @@ extern "C" void LLVMInitializeMSP430Target() {
RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
}
-MSP430TargetMachine::MSP430TargetMachine(const Target &T,
- StringRef TT,
- StringRef CPU,
- StringRef FS,
+MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- // FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-n8:16"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index ea5d407..efa8403 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,13 +15,7 @@
#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
#define LLVM_TARGET_MSP430_TARGETMACHINE_H
-#include "MSP430FrameLowering.h"
-#include "MSP430ISelLowering.h"
-#include "MSP430InstrInfo.h"
-#include "MSP430RegisterInfo.h"
-#include "MSP430SelectionDAGInfo.h"
#include "MSP430Subtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -31,11 +25,6 @@ namespace llvm {
///
class MSP430TargetMachine : public LLVMTargetMachine {
MSP430Subtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- MSP430InstrInfo InstrInfo;
- MSP430TargetLowering TLInfo;
- MSP430SelectionDAGInfo TSInfo;
- MSP430FrameLowering FrameLowering;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -44,22 +33,25 @@ public:
CodeGenOpt::Level OL);
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const MSP430InstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ const MSP430Subtarget *getSubtargetImpl() const override {
+ return &Subtarget;
}
- const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL;}
- const MSP430Subtarget *getSubtargetImpl() const override { return &Subtarget; }
-
const TargetRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const MSP430TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
- const MSP430SelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk
index 4e8831c..9f437f8 100644
--- a/lib/Target/Mips/Android.mk
+++ b/lib/Target/Mips/Android.mk
@@ -8,6 +8,7 @@ mips_codegen_TBLGEN_TABLES := \
MipsGenMCPseudoLowering.inc \
MipsGenAsmWriter.inc \
MipsGenDAGISel.inc \
+ MipsGenFastISel.inc \
MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 86fd386..0c06be8 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -72,72 +72,69 @@ class MipsAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseParenSuffix(StringRef Name,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ bool ParseParenSuffix(StringRef Name, OperandVector &Operands);
- bool ParseBracketSuffix(StringRef Name,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ bool ParseBracketSuffix(StringRef Name, OperandVector &Operands);
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
-
- MipsAsmParser::OperandMatchResultTy MatchAnyRegisterNameWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier,
- SMLoc S);
+ MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterWithoutDollar(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- SMLoc S);
+ MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ StringRef Identifier, SMLoc S);
MipsAsmParser::OperandMatchResultTy
- ParseAnyRegister(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
- MipsAsmParser::OperandMatchResultTy
- ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseAnyRegister(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- ParseJumpTarget(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseImm(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseJumpTarget(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
- bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseLSAImm(OperandVector &Operands);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &,
- StringRef Mnemonic);
+ bool searchSymbolAlias(OperandVector &Operands);
+
+ bool ParseOperand(OperandVector &, StringRef Mnemonic);
bool needsExpansion(MCInst &Inst);
- void expandInstruction(MCInst &Inst, SMLoc IDLoc,
+ // Expands assembly pseudo instructions.
+ // Returns false on success, true otherwise.
+ bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions, bool isLoad,
bool isImmOpnd);
- bool reportParseError(StringRef ErrorMsg);
- bool reportParseError(SMLoc Loc, StringRef ErrorMsg);
+ bool reportParseError(Twine ErrorMsg);
+ bool reportParseError(SMLoc Loc, Twine ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
@@ -159,32 +156,20 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
bool parseSetNoMips16Directive();
+ bool parseSetFpDirective();
bool parseSetAssignment();
bool parseDataDirective(unsigned Size, SMLoc L);
bool parseDirectiveGpWord();
bool parseDirectiveGpDWord();
+ bool parseDirectiveModule();
+ bool parseDirectiveModuleFP();
+ bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
+ StringRef Directive);
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
- bool isGP64() const {
- return (STI.getFeatureBits() & Mips::FeatureGP64Bit) != 0;
- }
-
- bool isFP64() const {
- return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
- }
-
- bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
- bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
-
- bool isMicroMips() const {
- return STI.getFeatureBits() & Mips::FeatureMicroMips;
- }
-
- bool parseRegister(unsigned &RegNum);
-
bool eatComma(StringRef ErrorStr);
int matchCPURegisterName(StringRef Symbol);
@@ -205,7 +190,7 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getGPR(int RegNo);
- int getATReg();
+ int getATReg(SMLoc Loc);
bool processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
@@ -230,23 +215,85 @@ class MipsAsmParser : public MCTargetAsmParser {
}
public:
+ enum MipsMatchResultTy {
+ Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "MipsGenAsmMatcher.inc"
+#undef GET_OPERAND_DIAGNOSTIC_TYPES
+
+ };
+
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ getTargetStreamer().updateABIInfo(*this);
+
// Assert exactly one ABI was chosen.
assert((((STI.getFeatureBits() & Mips::FeatureO32) != 0) +
((STI.getFeatureBits() & Mips::FeatureEABI) != 0) +
((STI.getFeatureBits() & Mips::FeatureN32) != 0) +
((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1);
+
+ if (!isABI_O32() && !allowOddSPReg() != 0)
+ report_fatal_error("-mno-odd-spreg requires the O32 ABI");
}
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+ /// True if all of $fcc0 - $fcc7 exist for the current ISA.
+ bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
+
+ bool isGP64bit() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
+ bool isFP64bit() const { return STI.getFeatureBits() & Mips::FeatureFP64Bit; }
+ bool isABI_N32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
+ bool isABI_N64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
+ bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
+ bool isABI_FPXX() const { return false; } // TODO: add check for FeatureXX
+
+ bool allowOddSPReg() const {
+ return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg);
+ }
+
+ bool inMicroMipsMode() const {
+ return STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
+ bool hasMips1() const { return STI.getFeatureBits() & Mips::FeatureMips1; }
+ bool hasMips2() const { return STI.getFeatureBits() & Mips::FeatureMips2; }
+ bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
+ bool hasMips4() const { return STI.getFeatureBits() & Mips::FeatureMips4; }
+ bool hasMips5() const { return STI.getFeatureBits() & Mips::FeatureMips5; }
+ bool hasMips32() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32);
+ }
+ bool hasMips64() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64);
+ }
+ bool hasMips32r2() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32r2);
+ }
+ bool hasMips64r2() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64r2);
+ }
+ bool hasMips32r6() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32r6);
+ }
+ bool hasMips64r6() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64r6);
+ }
+ bool hasDSP() const { return (STI.getFeatureBits() & Mips::FeatureDSP); }
+ bool hasDSPR2() const { return (STI.getFeatureBits() & Mips::FeatureDSPR2); }
+ bool hasMSA() const { return (STI.getFeatureBits() & Mips::FeatureMSA); }
+
+ bool inMips16Mode() const {
+ return STI.getFeatureBits() & Mips::FeatureMips16;
+ }
+ // TODO: see how can we get this info.
+ bool mipsSEUsesSoftFloat() const { return false; }
+
/// Warn if RegNo is the current assembler temporary.
void WarnIfAssemblerTemporary(int RegNo, SMLoc Loc);
};
@@ -261,9 +308,9 @@ public:
/// Broad categories of register classes
/// The exact class is finalized by the render method.
enum RegKind {
- RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64())
+ RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64bit())
RegKind_FGR = 2, /// FGR32, FGR64, AFGR64 (depending on context and
- /// isFP64())
+ /// isFP64bit())
RegKind_FCC = 4, /// FCC
RegKind_MSA128 = 8, /// MSA128[BHWD] (makes no difference which)
RegKind_MSACtrl = 16, /// MSA control registers
@@ -289,9 +336,11 @@ private:
k_Token /// A simple token
} Kind;
+public:
MipsOperand(KindTy K, MipsAsmParser &Parser)
: MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+private:
/// For diagnostics, and checking the assembler temporary
MipsAsmParser &AsmParser;
@@ -330,10 +379,11 @@ private:
SMLoc StartLoc, EndLoc;
/// Internal constructor for register kinds
- static MipsOperand *CreateReg(unsigned Index, RegKind RegKind,
- const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_RegisterIndex, Parser);
+ static std::unique_ptr<MipsOperand> CreateReg(unsigned Index, RegKind RegKind,
+ const MCRegisterInfo *RegInfo,
+ SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_RegisterIndex, Parser);
Op->RegIdx.Index = Index;
Op->RegIdx.RegInfo = RegInfo;
Op->RegIdx.Kind = RegKind;
@@ -521,6 +571,10 @@ public:
void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getFGR32Reg()));
+ // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
+ if (!AsmParser.allowOddSPReg() && RegIdx.Index & 1)
+ AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
+ "registers");
}
void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
@@ -612,6 +666,12 @@ public:
return Kind == k_Token;
}
bool isMem() const override { return Kind == k_Memory; }
+ bool isConstantMemOff() const {
+ return isMem() && dyn_cast<MCConstantExpr>(getMemOff());
+ }
+ template <unsigned Bits> bool isMemWithSimmOffset() const {
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
+ }
bool isInvNum() const { return Kind == k_Immediate; }
bool isLSAImm() const {
if (!isConstantImm())
@@ -656,9 +716,13 @@ public:
return Mem.Off;
}
- static MipsOperand *CreateToken(StringRef Str, SMLoc S,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Token, Parser);
+ int64_t getConstantMemOff() const {
+ return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
+ }
+
+ static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
+ MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Token, Parser);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -668,74 +732,75 @@ public:
/// Create a numeric register (e.g. $1). The exact register remains
/// unresolved until an instruction successfully matches
- static MipsOperand *CreateNumericReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
DEBUG(dbgs() << "CreateNumericReg(" << Index << ", ...)\n");
return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a GPR.
/// This is typically only used for named registers such as $gp.
- static MipsOperand *CreateGPRReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a FGR.
/// This is typically only used for named registers such as $f0.
- static MipsOperand *CreateFGRReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an FCC.
/// This is typically only used for named registers such as $fcc0.
- static MipsOperand *CreateFCCReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an ACC.
/// This is typically only used for named registers such as $ac0.
- static MipsOperand *CreateACCReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an MSA128.
/// This is typically only used for named registers such as $w0.
- static MipsOperand *CreateMSA128Reg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an MSACtrl.
/// This is typically only used for named registers such as $msaaccess.
- static MipsOperand *CreateMSACtrlReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser);
}
- static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Immediate, Parser);
+ static std::unique_ptr<MipsOperand>
+ CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Immediate, Parser);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static MipsOperand *CreateMem(MipsOperand *Base, const MCExpr *Off, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Memory, Parser);
- Op->Mem.Base = Base;
+ static std::unique_ptr<MipsOperand>
+ CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Memory, Parser);
+ Op->Mem.Base = Base.release();
Op->Mem.Off = Off;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -756,7 +821,11 @@ public:
return isRegIdx() && RegIdx.Kind & RegKind_CCR && RegIdx.Index <= 31;
}
bool isFCCAsmReg() const {
- return isRegIdx() && RegIdx.Kind & RegKind_FCC && RegIdx.Index <= 7;
+ if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
+ return false;
+ if (!AsmParser.hasEightFccRegisters())
+ return RegIdx.Index == 0;
+ return RegIdx.Index <= 7;
}
bool isACCAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
@@ -849,9 +918,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Offset = Inst.getOperand(2);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm()))
+ if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2)))
+ if (OffsetToAlignment(Offset.getImm(),
+ 1LL << (inMicroMipsMode() ? 1 : 2)))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BGEZ:
@@ -874,14 +944,23 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Offset = Inst.getOperand(1);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm()))
+ if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2)))
+ if (OffsetToAlignment(Offset.getImm(),
+ 1LL << (inMicroMipsMode() ? 1 : 2)))
return Error(IDLoc, "branch to misaligned address");
break;
}
}
+ // SSNOP is deprecated on MIPS32r6/MIPS64r6
+ // We still accept it but it is a normal nop.
+ if (hasMips32r6() && Inst.getOpcode() == Mips::SSNOP) {
+ std::string ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
+ Warning(IDLoc, "ssnop is deprecated for " + ISA + " and is equivalent to a "
+ "nop instruction");
+ }
+
if (MCID.hasDelaySlot() && Options.isReorder()) {
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
@@ -930,7 +1009,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
} // if load/store
if (needsExpansion(Inst))
- expandInstruction(Inst, IDLoc, Instructions);
+ return expandInstruction(Inst, IDLoc, Instructions);
else
Instructions.push_back(Inst);
@@ -943,17 +1022,27 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
case Mips::LoadImm32Reg:
case Mips::LoadAddr32Imm:
case Mips::LoadAddr32Reg:
+ case Mips::LoadImm64Reg:
return true;
default:
return false;
}
}
-void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
+ default:
+ assert(0 && "unimplemented expansion");
+ return true;
case Mips::LoadImm32Reg:
return expandLoadImm(Inst, IDLoc, Instructions);
+ case Mips::LoadImm64Reg:
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+ return expandLoadImm(Inst, IDLoc, Instructions);
case Mips::LoadAddr32Imm:
return expandLoadAddressImm(Inst, IDLoc, Instructions);
case Mips::LoadAddr32Reg:
@@ -961,7 +1050,31 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+namespace {
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ if (PerformShift) {
+ tmpInst.setOpcode(Mips::DSLL);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateImm(16));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ }
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(
+ MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+}
+
+bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
@@ -969,8 +1082,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
const MCOperand &RegOp = Inst.getOperand(0);
assert(RegOp.isReg() && "expected register operand kind");
- int ImmValue = ImmOp.getImm();
+ int64_t ImmValue = ImmOp.getImm();
tmpInst.setLoc(IDLoc);
+ // FIXME: gas has a special case for values that are 000...1111, which
+ // becomes a li -1 and then a dsrl
if (0 <= ImmValue && ImmValue <= 65535) {
// For 0 <= j <= 65535.
// li d,j => ori d,$zero,j
@@ -987,25 +1102,76 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
- } else {
- // For any other value of j that is representable as a 32-bit integer.
+ } else if ((ImmValue & 0xffffffff) == ImmValue) {
+ // For any value of j that is representable as a 32-bit integer, create
+ // a sequence of:
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
- tmpInst.clear();
- tmpInst.setOpcode(Mips::ORi);
+ createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ } else if ((ImmValue & (0xffffLL << 48)) == 0) {
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // <------- lo32 ------>
+ // <------- hi32 ------>
+ // <- hi16 -> <- lo16 ->
+ // _________________________________
+ // | | | |
+ // | 16-bytes | 16-bytes | 16-bytes |
+ // |__________|__________|__________|
+ //
+ // For any value of j that is representable as a 48-bit integer, create
+ // a sequence of:
+ // li d,j => lui d,hi16(j)
+ // ori d,d,hi16(lo32(j))
+ // dsll d,d,16
+ // ori d,d,lo16(lo32(j))
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(
+ MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32));
+ Instructions.push_back(tmpInst);
+ createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ } else {
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // <------- hi32 ------> <------- lo32 ------>
+ // <- hi16 -> <- lo16 ->
+ // ___________________________________________
+ // | | | | |
+ // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
+ // |__________|__________|__________|__________|
+ //
+ // For any value of j that isn't representable as a 48-bit integer.
+ // li d,j => lui d,hi16(j)
+ // ori d,d,lo16(hi32(j))
+ // dsll d,d,16
+ // ori d,d,hi16(lo32(j))
+ // dsll d,d,16
+ // ori d,d,lo16(lo32(j))
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
- tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
- tmpInst.setLoc(IDLoc);
+ tmpInst.addOperand(
+ MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48));
Instructions.push_back(tmpInst);
+ createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
}
+ return false;
}
-void
+bool
MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
@@ -1046,9 +1212,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
Instructions.push_back(tmpInst);
}
+ return false;
}
-void
+bool
MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
@@ -1080,6 +1247,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
Instructions.push_back(tmpInst);
}
+ return false;
}
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
@@ -1090,8 +1258,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
- unsigned AtRegNum = getReg(
- (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
// 1st operand is either the source or destination register.
assert(Inst.getOperand(0).isReg() && "expected register operand kind");
unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -1111,10 +1277,46 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
ExprOffset = Inst.getOperand(2).getExpr();
// All instructions will have the same location.
TempInst.setLoc(IDLoc);
- // 1st instruction in expansion is LUi. For load instruction we can use
- // the dst register as a temporary if base and dst are different,
- // but for stores we must use $at.
- TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
+ // These are some of the types of expansions we perform here:
+ // 1) lw $8, sym => lui $8, %hi(sym)
+ // lw $8, %lo(sym)($8)
+ // 2) lw $8, offset($9) => lui $8, %hi(offset)
+ // add $8, $8, $9
+ // lw $8, %lo(offset)($9)
+ // 3) lw $8, offset($8) => lui $at, %hi(offset)
+ // add $at, $at, $8
+ // lw $8, %lo(offset)($at)
+ // 4) sw $8, sym => lui $at, %hi(sym)
+ // sw $8, %lo(sym)($at)
+ // 5) sw $8, offset($8) => lui $at, %hi(offset)
+ // add $at, $at, $8
+ // sw $8, %lo(offset)($at)
+ // 6) ldc1 $f0, sym => lui $at, %hi(sym)
+ // ldc1 $f0, %lo(sym)($at)
+ //
+ // For load instructions we can use the destination register as a temporary
+ // if base and dst are different (examples 1 and 2) and if the base register
+ // is general purpose otherwise we must use $at (example 6) and error if it's
+ // not available. For stores we must use $at (examples 4 and 5) because we
+ // must not clobber the source register setting up the offset.
+ const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode());
+ int16_t RegClassOp0 = Desc.OpInfo[0].RegClass;
+ unsigned RegClassIDOp0 =
+ getContext().getRegisterInfo()->getRegClass(RegClassOp0).getID();
+ bool IsGPR = (RegClassIDOp0 == Mips::GPR32RegClassID) ||
+ (RegClassIDOp0 == Mips::GPR64RegClassID);
+ if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
+ TmpRegNum = RegOpNum;
+ else {
+ int AT = getATReg(IDLoc);
+ // At this point we need AT to perform the expansions and we exit if it is
+ // not available.
+ if (!AT)
+ return;
+ TmpRegNum = getReg(
+ (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
+ }
+
TempInst.setOpcode(Mips::LUi);
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
if (isImmOpnd)
@@ -1164,10 +1366,24 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.clear();
}
-bool MipsAsmParser::MatchAndEmitInstruction(
- SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, MCStreamer &Out,
- unsigned &ErrorInfo, bool MatchingInlineAsm) {
+unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // As described by the Mips32r2 spec, the registers Rd and Rs for
+ // jalr.hb must be different.
+ unsigned Opcode = Inst.getOpcode();
+
+ if (Opcode == Mips::JALR_HB &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()))
+ return Match_RequiresDifferentSrcAndDst;
+
+ return Match_Success;
+}
+
+bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
unsigned MatchResult =
@@ -1192,7 +1408,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand *)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -1201,6 +1417,8 @@ bool MipsAsmParser::MatchAndEmitInstruction(
}
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction");
+ case Match_RequiresDifferentSrcAndDst:
+ return Error(IDLoc, "source and destination must be different");
}
return true;
}
@@ -1254,7 +1472,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
.Case("t9", 25)
.Default(-1);
- if (isN32() || isN64()) {
+ if (isABI_N32() || isABI_N64()) {
// Although SGI documentation just cuts out t0-t3 for n32/n64,
// GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
// We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
@@ -1354,10 +1572,11 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-int MipsAsmParser::getATReg() {
+int MipsAsmParser::getATReg(SMLoc Loc) {
int AT = Options.getATRegNum();
if (AT == 0)
- TokError("Pseudo instruction requires $at, which is not available");
+ reportParseError(Loc,
+ "Pseudo instruction requires $at, which is not available");
return AT;
}
@@ -1366,7 +1585,7 @@ unsigned MipsAsmParser::getReg(int RC, int RegNo) {
}
unsigned MipsAsmParser::getGPR(int RegNo) {
- return getReg(isGP64() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID,
+ return getReg(isGP64bit() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID,
RegNo);
}
@@ -1378,9 +1597,7 @@ int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
return getReg(RegClass, RegNum);
}
-bool
-MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- StringRef Mnemonic) {
+bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
DEBUG(dbgs() << "ParseOperand\n");
// Check if the current operand has a custom associated parser, if so, try to
@@ -1431,6 +1648,7 @@ MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
+ case AsmToken::Tilde:
case AsmToken::String: {
DEBUG(dbgs() << ".. generic integer\n");
OperandMatchResultTy ResTy = ParseImm(Operands);
@@ -1578,11 +1796,11 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
- SmallVector<MCParsedAsmOperand *, 1> Operands;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
OperandMatchResultTy ResTy = ParseAnyRegister(Operands);
if (ResTy == MatchOperand_Success) {
assert(Operands.size() == 1);
- MipsOperand &Operand = *static_cast<MipsOperand *>(Operands.front());
+ MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front());
StartLoc = Operand.getStartLoc();
EndLoc = Operand.getEndLoc();
@@ -1592,11 +1810,9 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
// register is a parse error.
if (Operand.isGPRAsmReg()) {
// Resolve to GPR32 or GPR64 appropriately.
- RegNo = isGP64() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
+ RegNo = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
}
- delete &Operand;
-
return (RegNo == (unsigned)-1);
}
@@ -1632,8 +1848,8 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
return Result;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseMemOperand(OperandVector &Operands) {
DEBUG(dbgs() << "parseMemOperand\n");
const MCExpr *IdVal = nullptr;
SMLoc S;
@@ -1653,8 +1869,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
- MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
- if (Mnemonic->getToken() == "la") {
+ MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]);
+ if (Mnemonic.getToken() == "la") {
SMLoc E =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
@@ -1666,9 +1882,10 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
// Zero register assumed, add a memory operand with ZERO as its base.
// "Base" will be managed by k_Memory.
- MipsOperand *Base = MipsOperand::CreateGPRReg(
- 0, getContext().getRegisterInfo(), S, E, *this);
- Operands.push_back(MipsOperand::CreateMem(Base, IdVal, S, E, *this));
+ auto Base = MipsOperand::CreateGPRReg(0, getContext().getRegisterInfo(),
+ S, E, *this);
+ Operands.push_back(
+ MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
return MatchOperand_Success;
}
Error(Parser.getTok().getLoc(), "'(' expected");
@@ -1695,7 +1912,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
IdVal = MCConstantExpr::Create(0, getContext());
// Replace the register operand with the memory operand.
- MipsOperand *op = static_cast<MipsOperand *>(Operands.back());
+ std::unique_ptr<MipsOperand> op(
+ static_cast<MipsOperand *>(Operands.back().release()));
// Remove the register from the operands.
// "op" will be managed by k_Memory.
Operands.pop_back();
@@ -1709,12 +1927,11 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
getContext());
}
- Operands.push_back(MipsOperand::CreateMem(op, IdVal, S, E, *this));
+ Operands.push_back(MipsOperand::CreateMem(std::move(op), IdVal, S, E, *this));
return MatchOperand_Success;
}
-bool MipsAsmParser::searchSymbolAlias(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
@@ -1740,9 +1957,8 @@ bool MipsAsmParser::searchSymbolAlias(
} else if (Expr->getKind() == MCExpr::Constant) {
Parser.Lex();
const MCConstantExpr *Const = static_cast<const MCConstantExpr *>(Expr);
- MipsOperand *op =
- MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this);
- Operands.push_back(op);
+ Operands.push_back(
+ MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this));
return true;
}
}
@@ -1750,9 +1966,9 @@ bool MipsAsmParser::searchSymbolAlias(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterNameWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier,
- SMLoc S) {
+MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ StringRef Identifier,
+ SMLoc S) {
int Index = matchCPURegisterName(Identifier);
if (Index != -1) {
Operands.push_back(MipsOperand::CreateGPRReg(
@@ -1799,8 +2015,7 @@ MipsAsmParser::MatchAnyRegisterNameWithoutDollar(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, SMLoc S) {
+MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
auto Token = Parser.getLexer().peekTok(false);
if (Token.is(AsmToken::Identifier)) {
@@ -1822,8 +2037,8 @@ MipsAsmParser::MatchAnyRegisterWithoutDollar(
return MatchOperand_NoMatch;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
DEBUG(dbgs() << "ParseAnyRegister\n");
auto Token = Parser.getTok();
@@ -1850,7 +2065,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::ParseImm(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -1858,6 +2073,7 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
+ case AsmToken::Tilde:
case AsmToken::String:
break;
}
@@ -1872,8 +2088,8 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
return MatchOperand_Success;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
DEBUG(dbgs() << "ParseJumpTarget\n");
SMLoc S = getLexer().getLoc();
@@ -1899,7 +2115,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::parseInvNum(OperandVector &Operands) {
const MCExpr *IdVal;
// If the first token is '$' we may have register operand.
if (Parser.getTok().is(AsmToken::Dollar))
@@ -1917,7 +2133,7 @@ MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -1996,8 +2212,7 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
/// ::= '(', register, ')'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseParenSuffix(
- StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
if (getLexer().is(AsmToken::LParen)) {
Operands.push_back(
MipsOperand::CreateToken("(", getLexer().getLoc(), *this));
@@ -2025,8 +2240,8 @@ bool MipsAsmParser::ParseParenSuffix(
/// ::= '[', integer, ']'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseBracketSuffix(
- StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
+ OperandVector &Operands) {
if (getLexer().is(AsmToken::LBrac)) {
Operands.push_back(
MipsOperand::CreateToken("[", getLexer().getLoc(), *this));
@@ -2048,10 +2263,12 @@ bool MipsAsmParser::ParseBracketSuffix(
return false;
}
-bool MipsAsmParser::ParseInstruction(
- ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
DEBUG(dbgs() << "ParseInstruction\n");
+ // We have reached first instruction, module directive after
+ // this is forbidden.
+ getTargetStreamer().setCanHaveModuleDir(false);
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
@@ -2098,13 +2315,13 @@ bool MipsAsmParser::ParseInstruction(
return false;
}
-bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
+bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
}
-bool MipsAsmParser::reportParseError(SMLoc Loc, StringRef ErrorMsg) {
+bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
return Error(Loc, ErrorMsg);
}
@@ -2238,6 +2455,32 @@ bool MipsAsmParser::parseSetNoMips16Directive() {
return false;
}
+bool MipsAsmParser::parseSetFpDirective() {
+ MipsABIFlagsSection::FpABIKind FpAbiVal;
+ // Line can be: .set fp=32
+ // .set fp=xx
+ // .set fp=64
+ Parser.Lex(); // Eat fp token
+ AsmToken Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Equal)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Eat '=' token.
+ Tok = Parser.getTok();
+
+ if (!parseFpABIValue(FpAbiVal, ".set"))
+ return false;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ getTargetStreamer().emitDirectiveSetFp(FpAbiVal);
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
const MCExpr *Value;
@@ -2296,25 +2539,6 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
return false;
}
-bool MipsAsmParser::parseRegister(unsigned &RegNum) {
- if (!getLexer().is(AsmToken::Dollar))
- return false;
-
- Parser.Lex();
-
- const AsmToken &Reg = Parser.getTok();
- if (Reg.is(AsmToken::Identifier)) {
- RegNum = matchCPURegisterName(Reg.getIdentifier());
- } else if (Reg.is(AsmToken::Integer)) {
- RegNum = Reg.getIntVal();
- } else {
- return false;
- }
-
- Parser.Lex();
- return true;
-}
-
bool MipsAsmParser::eatComma(StringRef ErrorStr) {
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
@@ -2332,21 +2556,20 @@ bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
// FIXME: Warn if cpload is used in Mips16 mode.
- SmallVector<MCParsedAsmOperand *, 1> Reg;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
OperandMatchResultTy ResTy = ParseAnyRegister(Reg);
if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
reportParseError("expected register containing function address");
return false;
}
- MipsOperand *RegOpnd = static_cast<MipsOperand *>(Reg[0]);
- if (!RegOpnd->isGPRAsmReg()) {
- reportParseError(RegOpnd->getStartLoc(), "invalid register");
+ MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+ if (!RegOpnd.isGPRAsmReg()) {
+ reportParseError(RegOpnd.getStartLoc(), "invalid register");
return false;
}
- getTargetStreamer().emitDirectiveCpload(RegOpnd->getGPR32Reg());
- delete RegOpnd;
+ getTargetStreamer().emitDirectiveCpload(RegOpnd.getGPR32Reg());
return false;
}
@@ -2355,23 +2578,48 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
unsigned Save;
bool SaveIsReg = true;
- if (!parseRegister(FuncReg))
- return reportParseError("expected register containing function address");
- FuncReg = getGPR(FuncReg);
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+ OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch) {
+ reportParseError("expected register containing function address");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!FuncRegOpnd.isGPRAsmReg()) {
+ reportParseError(FuncRegOpnd.getStartLoc(), "invalid register");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ FuncReg = FuncRegOpnd.getGPR32Reg();
+ TmpReg.clear();
if (!eatComma("expected comma parsing directive"))
return true;
- if (!parseRegister(Save)) {
+ ResTy = ParseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Integer)) {
Save = Tok.getIntVal();
SaveIsReg = false;
Parser.Lex();
- } else
- return reportParseError("expected save register or stack offset");
- } else
- Save = getGPR(Save);
+ } else {
+ reportParseError("expected save register or stack offset");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ } else {
+ MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!SaveOpnd.isGPRAsmReg()) {
+ reportParseError(SaveOpnd.getStartLoc(), "invalid register");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Save = SaveOpnd.getGPR32Reg();
+ }
if (!eatComma("expected comma parsing directive"))
return true;
@@ -2414,6 +2662,8 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoAtDirective();
} else if (Tok.getString() == "at") {
return parseSetAtDirective();
+ } else if (Tok.getString() == "fp") {
+ return parseSetFpDirective();
} else if (Tok.getString() == "reorder") {
return parseSetReorderDirective();
} else if (Tok.getString() == "noreorder") {
@@ -2546,6 +2796,134 @@ bool MipsAsmParser::parseDirectiveOption() {
return false;
}
+/// parseDirectiveModule
+/// ::= .module oddspreg
+/// ::= .module nooddspreg
+/// ::= .module fp=value
+bool MipsAsmParser::parseDirectiveModule() {
+ MCAsmLexer &Lexer = getLexer();
+ SMLoc L = Lexer.getLoc();
+
+ if (!getTargetStreamer().getCanHaveModuleDir()) {
+ // TODO : get a better message.
+ reportParseError(".module directive must appear before any code");
+ return false;
+ }
+
+ if (Lexer.is(AsmToken::Identifier)) {
+ StringRef Option = Parser.getTok().getString();
+ Parser.Lex();
+
+ if (Option == "oddspreg") {
+ getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
+ clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("Expected end of statement");
+ return false;
+ }
+
+ return false;
+ } else if (Option == "nooddspreg") {
+ if (!isABI_O32()) {
+ Error(L, "'.module nooddspreg' requires the O32 ABI");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
+ setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("Expected end of statement");
+ return false;
+ }
+
+ return false;
+ } else if (Option == "fp") {
+ return parseDirectiveModuleFP();
+ }
+
+ return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
+ }
+
+ return false;
+}
+
+/// parseDirectiveModuleFP
+/// ::= =32
+/// ::= =xx
+/// ::= =64
+bool MipsAsmParser::parseDirectiveModuleFP() {
+ MCAsmLexer &Lexer = getLexer();
+
+ if (Lexer.isNot(AsmToken::Equal)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Eat '=' token.
+
+ MipsABIFlagsSection::FpABIKind FpABI;
+ if (!parseFpABIValue(FpABI, ".module"))
+ return false;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ // Emit appropriate flags.
+ getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32());
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
+ StringRef Directive) {
+ MCAsmLexer &Lexer = getLexer();
+
+ if (Lexer.is(AsmToken::Identifier)) {
+ StringRef Value = Parser.getTok().getString();
+ Parser.Lex();
+
+ if (Value != "xx") {
+ reportParseError("unsupported value, expected 'xx', '32' or '64'");
+ return false;
+ }
+
+ if (!isABI_O32()) {
+ reportParseError("'" + Directive + " fp=xx' requires the O32 ABI");
+ return false;
+ }
+
+ FpABI = MipsABIFlagsSection::FpABIKind::XX;
+ return true;
+ }
+
+ if (Lexer.is(AsmToken::Integer)) {
+ unsigned Value = Parser.getTok().getIntVal();
+ Parser.Lex();
+
+ if (Value != 32 && Value != 64) {
+ reportParseError("unsupported value, expected 'xx', '32' or '64'");
+ return false;
+ }
+
+ if (Value == 32) {
+ if (!isABI_O32()) {
+ reportParseError("'" + Directive + " fp=32' requires the O32 ABI");
+ return false;
+ }
+
+ FpABI = MipsABIFlagsSection::FpABIKind::S32;
+ } else
+ FpABI = MipsABIFlagsSection::FpABIKind::S64;
+
+ return true;
+ }
+
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -2624,6 +3002,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpsetup")
return parseDirectiveCPSetup();
+ if (IDVal == ".module")
+ return parseDirectiveModule();
+
return true;
}
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 95670aa..902b877 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -57,16 +57,24 @@ class MipsDisassembler : public MipsDisassemblerBase {
public:
/// Constructor - Initializes the disassembler.
///
- MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
- bool bigEndian) :
- MipsDisassemblerBase(STI, Ctx, bigEndian) {
- IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
- }
+ MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian)
+ : MipsDisassemblerBase(STI, Ctx, bigEndian) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
- bool isMips32r6() const {
+ bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
+ bool hasMips32() const { return STI.getFeatureBits() & Mips::FeatureMips32; }
+ bool hasMips32r6() const {
return STI.getFeatureBits() & Mips::FeatureMips32r6;
}
+ bool isGP64() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
+
+ bool hasCOP3() const {
+ // Only present in MIPS-I and MIPS-II
+ return !hasMips32() && !hasMips3();
+ }
+
/// getInstruction - See MCDisassembler.
DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
@@ -149,6 +157,10 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -260,6 +272,11 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -285,6 +302,9 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
/// handle.
template <typename InsnType>
@@ -316,6 +336,11 @@ static DecodeStatus
DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
+template <typename InsnType>
+static DecodeStatus
+DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
namespace llvm {
extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
TheMips64elTarget;
@@ -511,6 +536,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
if (Rt == 0)
return MCDisassembler::Fail;
@@ -518,8 +544,14 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
MI.setOpcode(Mips::BLEZC);
else if (Rs == Rt)
MI.setOpcode(Mips::BGEZC);
- else
- return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet.
+ else {
+ HasRs = true;
+ MI.setOpcode(Mips::BGEC);
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
Rt)));
@@ -544,6 +576,8 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
// BLTZC if rs == rt && rt != 0
// BLTC if rs != rt && rs != 0 && rt != 0
+ bool HasRs = false;
+
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
@@ -554,8 +588,14 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
MI.setOpcode(Mips::BGTZC);
else if (Rs == Rt)
MI.setOpcode(Mips::BLTZC);
- else
- return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet.
+ else {
+ MI.setOpcode(Mips::BLTC);
+ HasRs = true;
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
Rt)));
@@ -595,8 +635,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
} else if (Rs == Rt) {
MI.setOpcode(Mips::BLTZALC);
HasRs = true;
- } else
- return MCDisassembler::Fail; // BLTUC not implemented yet
+ } else {
+ MI.setOpcode(Mips::BLTUC);
+ HasRs = true;
+ HasRt = true;
+ }
if (HasRs)
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
@@ -611,6 +654,48 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
return MCDisassembler::Success;
}
+template <typename InsnType>
+static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the BLEZL instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b000110 sssss ttttt iiiiiiiiiiiiiiii
+ // Invalid if rs == 0
+ // BLEZALC if rs == 0 && rt != 0
+ // BGEZALC if rs == rt && rt != 0
+ // BGEUC if rs != rt && rs != 0 && rt != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
+
+ if (Rt == 0)
+ return MCDisassembler::Fail;
+ else if (Rs == 0)
+ MI.setOpcode(Mips::BLEZALC);
+ else if (Rs == Rt)
+ MI.setOpcode(Mips::BGEZALC);
+ else {
+ HasRs = true;
+ MI.setOpcode(Mips::BGEUC);
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
/// readInstruction - read four bytes from the MemoryObject
/// and return 32 bit word sorted according to the given endianess
static DecodeStatus readInstruction32(const MemoryObject &region,
@@ -670,6 +755,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
if (IsMicroMips) {
+ DEBUG(dbgs() << "Trying MicroMips32 table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
this, STI);
@@ -680,7 +766,28 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
- if (isMips32r6()) {
+ if (hasCOP3()) {
+ DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
+ Result =
+ decodeInstruction(DecoderTableCOP3_32, instr, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ if (hasMips32r6() && isGP64()) {
+ DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
+ Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ if (hasMips32r6()) {
+ DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -689,6 +796,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
}
}
+ DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
this, STI);
@@ -840,6 +948,17 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -965,6 +1084,27 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff);
+ unsigned Rt = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Rt = getReg(Decoder, Mips::GPR32RegClassID, Rt);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if(Inst.getOpcode() == Mips::SC_R6 || Inst.getOpcode() == Mips::SCD_R6){
+ Inst.addOperand(MCOperand::CreateReg(Rt));
+ }
+
+ Inst.addOperand(MCOperand::CreateReg(Rt));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
@@ -1197,3 +1337,9 @@ static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
return MCDisassembler::Success;
}
+
+static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3));
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Mips/MCTargetDesc/Android.mk b/lib/Target/Mips/MCTargetDesc/Android.mk
index 7ee11a1..c8d18fc 100644
--- a/lib/Target/Mips/MCTargetDesc/Android.mk
+++ b/lib/Target/Mips/MCTargetDesc/Android.mk
@@ -7,6 +7,7 @@ mips_mc_desc_TBLGEN_TABLES := \
MipsGenSubtargetInfo.inc
mips_mc_desc_SRC_FILES := \
+ MipsABIFlagsSection.cpp \
MipsAsmBackend.cpp \
MipsELFObjectWriter.cpp \
MipsELFStreamer.cpp \
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index d3e2fd7..c14ee35 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMipsDesc
+ MipsABIFlagsSection.cpp
MipsAsmBackend.cpp
MipsELFObjectWriter.cpp
MipsELFStreamer.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
new file mode 100644
index 0000000..52d5dd3
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -0,0 +1,60 @@
+//===-- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsABIFlagsSection.h"
+
+using namespace llvm;
+
+uint8_t MipsABIFlagsSection::getFpABIValue() {
+ switch (FpABI) {
+ case FpABIKind::ANY:
+ return Val_GNU_MIPS_ABI_FP_ANY;
+ case FpABIKind::XX:
+ return Val_GNU_MIPS_ABI_FP_XX;
+ case FpABIKind::S32:
+ return Val_GNU_MIPS_ABI_FP_DOUBLE;
+ case FpABIKind::S64:
+ if (Is32BitABI)
+ return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A;
+ return Val_GNU_MIPS_ABI_FP_DOUBLE;
+ }
+
+ llvm_unreachable("unexpected fp abi value");
+}
+
+StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) {
+ switch (Value) {
+ case FpABIKind::XX:
+ return "xx";
+ case FpABIKind::S32:
+ return "32";
+ case FpABIKind::S64:
+ return "64";
+ default:
+ llvm_unreachable("unsupported fp abi value");
+ }
+}
+
+namespace llvm {
+MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
+ // Write out a Elf_Internal_ABIFlags_v0 struct
+ OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
+ OS.EmitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level
+ OS.EmitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev
+ OS.EmitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size
+ OS.EmitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size
+ OS.EmitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size
+ OS.EmitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi
+ OS.EmitIntValue(ABIFlagsSection.getISAExtensionSetValue(), 4); // isa_ext
+ OS.EmitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases
+ OS.EmitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1
+ OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
+ return OS;
+}
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
new file mode 100644
index 0000000..ab18c44
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -0,0 +1,237 @@
+//===-- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSABIFLAGSSECTION_H
+#define MIPSABIFLAGSSECTION_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCStreamer;
+
+struct MipsABIFlagsSection {
+ // Values for the xxx_size bytes of an ABI flags structure.
+ enum AFL_REG {
+ AFL_REG_NONE = 0x00, // No registers.
+ AFL_REG_32 = 0x01, // 32-bit registers.
+ AFL_REG_64 = 0x02, // 64-bit registers.
+ AFL_REG_128 = 0x03 // 128-bit registers.
+ };
+
+ // Masks for the ases word of an ABI flags structure.
+ enum AFL_ASE {
+ AFL_ASE_DSP = 0x00000001, // DSP ASE.
+ AFL_ASE_DSPR2 = 0x00000002, // DSP R2 ASE.
+ AFL_ASE_EVA = 0x00000004, // Enhanced VA Scheme.
+ AFL_ASE_MCU = 0x00000008, // MCU (MicroController) ASE.
+ AFL_ASE_MDMX = 0x00000010, // MDMX ASE.
+ AFL_ASE_MIPS3D = 0x00000020, // MIPS-3D ASE.
+ AFL_ASE_MT = 0x00000040, // MT ASE.
+ AFL_ASE_SMARTMIPS = 0x00000080, // SmartMIPS ASE.
+ AFL_ASE_VIRT = 0x00000100, // VZ ASE.
+ AFL_ASE_MSA = 0x00000200, // MSA ASE.
+ AFL_ASE_MIPS16 = 0x00000400, // MIPS16 ASE.
+ AFL_ASE_MICROMIPS = 0x00000800, // MICROMIPS ASE.
+ AFL_ASE_XPA = 0x00001000 // XPA ASE.
+ };
+
+ // Values for the isa_ext word of an ABI flags structure.
+ enum AFL_EXT {
+ AFL_EXT_XLR = 1, // RMI Xlr instruction.
+ AFL_EXT_OCTEON2 = 2, // Cavium Networks Octeon2.
+ AFL_EXT_OCTEONP = 3, // Cavium Networks OcteonP.
+ AFL_EXT_LOONGSON_3A = 4, // Loongson 3A.
+ AFL_EXT_OCTEON = 5, // Cavium Networks Octeon.
+ AFL_EXT_5900 = 6, // MIPS R5900 instruction.
+ AFL_EXT_4650 = 7, // MIPS R4650 instruction.
+ AFL_EXT_4010 = 8, // LSI R4010 instruction.
+ AFL_EXT_4100 = 9, // NEC VR4100 instruction.
+ AFL_EXT_3900 = 10, // Toshiba R3900 instruction.
+ AFL_EXT_10000 = 11, // MIPS R10000 instruction.
+ AFL_EXT_SB1 = 12, // Broadcom SB-1 instruction.
+ AFL_EXT_4111 = 13, // NEC VR4111/VR4181 instruction.
+ AFL_EXT_4120 = 14, // NEC VR4120 instruction.
+ AFL_EXT_5400 = 15, // NEC VR5400 instruction.
+ AFL_EXT_5500 = 16, // NEC VR5500 instruction.
+ AFL_EXT_LOONGSON_2E = 17, // ST Microelectronics Loongson 2E.
+ AFL_EXT_LOONGSON_2F = 18 // ST Microelectronics Loongson 2F.
+ };
+
+ // Values for the fp_abi word of an ABI flags structure.
+ enum Val_GNU_MIPS_ABI {
+ Val_GNU_MIPS_ABI_FP_ANY = 0,
+ Val_GNU_MIPS_ABI_FP_DOUBLE = 1,
+ Val_GNU_MIPS_ABI_FP_XX = 5,
+ Val_GNU_MIPS_ABI_FP_64 = 6,
+ Val_GNU_MIPS_ABI_FP_64A = 7
+ };
+
+ enum AFL_FLAGS1 {
+ AFL_FLAGS1_ODDSPREG = 1
+ };
+
+ // Internal representation of the values used in .module fp=value
+ enum class FpABIKind { ANY, XX, S32, S64 };
+
+ // Version of flags structure.
+ uint16_t Version;
+ // The level of the ISA: 1-5, 32, 64.
+ uint8_t ISALevel;
+ // The revision of ISA: 0 for MIPS V and below, 1-n otherwise.
+ uint8_t ISARevision;
+ // The size of general purpose registers.
+ AFL_REG GPRSize;
+ // The size of co-processor 1 registers.
+ AFL_REG CPR1Size;
+ // The size of co-processor 2 registers.
+ AFL_REG CPR2Size;
+ // Processor-specific extension.
+ uint32_t ISAExtensionSet;
+ // Mask of ASEs used.
+ uint32_t ASESet;
+
+ bool OddSPReg;
+
+ bool Is32BitABI;
+
+protected:
+ // The floating-point ABI.
+ FpABIKind FpABI;
+
+public:
+ MipsABIFlagsSection()
+ : Version(0), ISALevel(0), ISARevision(0), GPRSize(AFL_REG_NONE),
+ CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), ISAExtensionSet(0),
+ ASESet(0), OddSPReg(false), Is32BitABI(false), FpABI(FpABIKind::ANY) {}
+
+ uint16_t getVersionValue() { return (uint16_t)Version; }
+ uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
+ uint8_t getISARevisionValue() { return (uint8_t)ISARevision; }
+ uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; }
+ uint8_t getCPR1SizeValue() { return (uint8_t)CPR1Size; }
+ uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; }
+ uint8_t getFpABIValue();
+ uint32_t getISAExtensionSetValue() { return (uint32_t)ISAExtensionSet; }
+ uint32_t getASESetValue() { return (uint32_t)ASESet; }
+
+ uint32_t getFlags1Value() {
+ uint32_t Value = 0;
+
+ if (OddSPReg)
+ Value |= (uint32_t)AFL_FLAGS1_ODDSPREG;
+
+ return Value;
+ }
+
+ uint32_t getFlags2Value() { return 0; }
+
+ FpABIKind getFpABI() { return FpABI; }
+ void setFpABI(FpABIKind Value, bool IsABI32Bit) {
+ FpABI = Value;
+ Is32BitABI = IsABI32Bit;
+ }
+ StringRef getFpABIString(FpABIKind Value);
+
+ template <class PredicateLibrary>
+ void setISALevelAndRevisionFromPredicates(const PredicateLibrary &P) {
+ if (P.hasMips64()) {
+ ISALevel = 64;
+ if (P.hasMips64r6())
+ ISARevision = 6;
+ else if (P.hasMips64r2())
+ ISARevision = 2;
+ else
+ ISARevision = 1;
+ } else if (P.hasMips32()) {
+ ISALevel = 32;
+ if (P.hasMips32r6())
+ ISARevision = 6;
+ else if (P.hasMips32r2())
+ ISARevision = 2;
+ else
+ ISARevision = 1;
+ } else {
+ ISARevision = 0;
+ if (P.hasMips5())
+ ISALevel = 5;
+ else if (P.hasMips4())
+ ISALevel = 4;
+ else if (P.hasMips3())
+ ISALevel = 3;
+ else if (P.hasMips2())
+ ISALevel = 2;
+ else if (P.hasMips1())
+ ISALevel = 1;
+ else
+ llvm_unreachable("Unknown ISA level!");
+ }
+ }
+
+ template <class PredicateLibrary>
+ void setGPRSizeFromPredicates(const PredicateLibrary &P) {
+ GPRSize = P.isGP64bit() ? AFL_REG_64 : AFL_REG_32;
+ }
+
+ template <class PredicateLibrary>
+ void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
+ if (P.mipsSEUsesSoftFloat())
+ CPR1Size = AFL_REG_NONE;
+ else if (P.hasMSA())
+ CPR1Size = AFL_REG_128;
+ else
+ CPR1Size = P.isFP64bit() ? AFL_REG_64 : AFL_REG_32;
+ }
+
+ template <class PredicateLibrary>
+ void setASESetFromPredicates(const PredicateLibrary &P) {
+ ASESet = 0;
+ if (P.hasDSP())
+ ASESet |= AFL_ASE_DSP;
+ if (P.hasDSPR2())
+ ASESet |= AFL_ASE_DSPR2;
+ if (P.hasMSA())
+ ASESet |= AFL_ASE_MSA;
+ if (P.inMicroMipsMode())
+ ASESet |= AFL_ASE_MICROMIPS;
+ if (P.inMips16Mode())
+ ASESet |= AFL_ASE_MIPS16;
+ }
+
+ template <class PredicateLibrary>
+ void setFpAbiFromPredicates(const PredicateLibrary &P) {
+ Is32BitABI = P.isABI_O32();
+
+ FpABI = FpABIKind::ANY;
+ if (P.isABI_N32() || P.isABI_N64())
+ FpABI = FpABIKind::S64;
+ else if (P.isABI_O32()) {
+ if (P.isFP64bit())
+ FpABI = FpABIKind::S64;
+ else if (P.isABI_FPXX())
+ FpABI = FpABIKind::XX;
+ else
+ FpABI = FpABIKind::S32;
+ }
+ }
+
+ template <class PredicateLibrary>
+ void setAllFromPredicates(const PredicateLibrary &P) {
+ setISALevelAndRevisionFromPredicates(P);
+ setGPRSizeFromPredicates(P);
+ setCPR1SizeFromPredicates(P);
+ setASESetFromPredicates(P);
+ setFpAbiFromPredicates(P);
+ }
+};
+
+MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
+}
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 5375a00..d8e6128 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -70,6 +70,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (!isIntN(16, Value) && Ctx)
Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
break;
+ case Mips::fixup_MIPS_PC19_S2:
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t)Value / 4;
+ // We now check if Value can be encoded as a 19-bit signed immediate.
+ if (!isIntN(19, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC19 fixup");
+ break;
case Mips::fixup_Mips_26:
// So far we are only using this type for jumps.
// The displacement is then divided by 4 to give us an 28 bit
@@ -104,6 +111,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (!isIntN(16, Value) && Ctx)
Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
break;
+ case Mips::fixup_MIPS_PC18_S3:
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t)Value / 8;
+ // We now check if Value can be encoded as a 18-bit signed immediate.
+ if (!isIntN(18, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC18 fixup");
+ break;
case Mips::fixup_MIPS_PC21_S2:
Value -= 4;
// Forcing a signed division because Value can be negative.
@@ -247,6 +261,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_Mips_GOT_LO16", 0, 16, 0 },
{ "fixup_Mips_CALL_HI16", 0, 16, 0 },
{ "fixup_Mips_CALL_LO16", 0, 16, 0 },
+ { "fixup_Mips_PC18_S3", 0, 18, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC19_S2", 0, 19, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
@@ -308,6 +324,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_Mips_GOT_LO16", 16, 16, 0 },
{ "fixup_Mips_CALL_HI16", 16, 16, 0 },
{ "fixup_Mips_CALL_LO16", 16, 16, 0 },
+ { "fixup_Mips_PC18_S3", 14, 18, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC19_S2", 13, 19, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index bc695e6..d5c3dbc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -65,7 +65,7 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
// FIXME.
- assert(0 && "RelaxInstruction() unimplemented");
+ llvm_unreachable("RelaxInstruction() unimplemented");
return false;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 74c12ff..49ac256 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -193,6 +193,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
Type = ELF::R_MICROMIPS_TLS_TPREL_LO16;
break;
+ case Mips::fixup_MIPS_PC19_S2:
+ Type = ELF::R_MIPS_PC19_S2;
+ break;
+ case Mips::fixup_MIPS_PC18_S3:
+ Type = ELF::R_MIPS_PC18_S3;
+ break;
case Mips::fixup_MIPS_PC21_S2:
Type = ELF::R_MIPS_PC21_S2;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 3079004..05080f0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -128,6 +128,12 @@ namespace Mips {
// resulting in - R_MIPS_CALL_LO16
fixup_Mips_CALL_LO16,
+ // resulting in - R_MIPS_PC18_S3
+ fixup_MIPS_PC18_S3,
+
+ // resulting in - R_MIPS_PC19_S2
+ fixup_MIPS_PC19_S2,
+
// resulting in - R_MIPS_PC21_S2
fixup_MIPS_PC21_S2,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 6aa3c76..e415412 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -38,7 +38,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
- DebugLabelSuffix = "=.";
+ UseAssignmentForEHBegin = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
HasLEB128 = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 85e0bf1..43fc521 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -621,11 +621,42 @@ unsigned
MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- assert(MI.getOperand(OpNo).isImm());
- // The immediate is encoded as 'immediate << 2'.
- unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
- assert((Res & 3) == 0);
- return Res >> 2;
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 2'.
+ unsigned Res = getMachineOpValue(MI, MO, Fixups, STI);
+ assert((Res & 3) == 0);
+ return Res >> 2;
+ }
+
+ assert(MO.isExpr() &&
+ "getSimm19Lsl2Encoding expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC19_S2)));
+ return 0;
+}
+
+unsigned
+MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 3'.
+ unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+ assert((Res & 7) == 0);
+ return Res >> 3;
+ }
+
+ assert(MO.isExpr() &&
+ "getSimm18Lsl2Encoding expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC18_S3)));
+ return 0;
}
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 3f7daab..304167f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -141,6 +141,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 21ccc3c..5bba3e5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
@@ -83,33 +84,6 @@ MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void MipsMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 8d7aacd..f193dc9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -49,7 +49,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 660e5a7..d2b929b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -133,6 +133,12 @@ createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
return S;
}
+static MCStreamer *createMipsNullStreamer(MCContext &Ctx) {
+ MCStreamer *S = llvm::createNullStreamer(Ctx);
+ new MipsTargetStreamer(*S);
+ return S;
+}
+
extern "C" void LLVMInitializeMipsTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
@@ -187,6 +193,12 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMipsTarget, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMipselTarget, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMips64Target, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMips64elTarget,
+ createMipsNullStreamer);
+
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
createMipsAsmBackendEB32);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index cd6be73..6cde8f9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -48,7 +48,13 @@ private:
bool PendingCall;
bool isIndirectJump(const MCInst &MI) {
- return MI.getOpcode() == Mips::JR || MI.getOpcode() == Mips::RET;
+ if (MI.getOpcode() == Mips::JALR) {
+ // MIPS32r6/MIPS64r6 doesn't have a JR instruction and uses JALR instead.
+ // JALR is an indirect branch if the link register is $0.
+ assert(MI.getOperand(0).isReg());
+ return MI.getOperand(0).getReg() == Mips::ZERO;
+ }
+ return MI.getOpcode() == Mips::JR;
}
bool isStackPointerFirstOperand(const MCInst &MI) {
@@ -56,7 +62,9 @@ private:
&& MI.getOperand(0).getReg() == Mips::SP);
}
- bool isCall(unsigned Opcode, bool *IsIndirectCall) {
+ bool isCall(const MCInst &MI, bool *IsIndirectCall) {
+ unsigned Opcode = MI.getOpcode();
+
*IsIndirectCall = false;
switch (Opcode) {
@@ -64,12 +72,19 @@ private:
return false;
case Mips::JAL:
+ case Mips::BAL:
case Mips::BAL_BR:
case Mips::BLTZAL:
case Mips::BGEZAL:
return true;
case Mips::JALR:
+ // JALR is only a call if the link register is not $0. Otherwise it's an
+ // indirect branch.
+ assert(MI.getOperand(0).isReg());
+ if (MI.getOperand(0).getReg() == Mips::ZERO)
+ return false;
+
*IsIndirectCall = true;
return true;
}
@@ -137,24 +152,23 @@ public:
&IsStore);
bool IsSPFirstOperand = isStackPointerFirstOperand(Inst);
if (IsMemAccess || IsSPFirstOperand) {
- if (PendingCall)
- report_fatal_error("Dangerous instruction in branch delay slot!");
-
bool MaskBefore = (IsMemAccess
&& baseRegNeedsLoadStoreMask(Inst.getOperand(AddrIdx)
.getReg()));
bool MaskAfter = IsSPFirstOperand && !IsStore;
- if (MaskBefore || MaskAfter)
+ if (MaskBefore || MaskAfter) {
+ if (PendingCall)
+ report_fatal_error("Dangerous instruction in branch delay slot!");
sandboxLoadStoreStackChange(Inst, AddrIdx, STI, MaskBefore, MaskAfter);
- else
- MipsELFStreamer::EmitInstruction(Inst, STI);
- return;
+ return;
+ }
+ // fallthrough
}
// Sandbox calls by aligning call and branch delay to the bundle end.
// For indirect calls, emit the mask before the call.
bool IsIndirectCall;
- if (isCall(Inst.getOpcode(), &IsIndirectCall)) {
+ if (isCall(Inst, &IsIndirectCall)) {
if (PendingCall)
report_fatal_error("Dangerous instruction in branch delay slot!");
@@ -203,6 +217,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
case Mips::LWC1:
case Mips::LDC1:
case Mips::LL:
+ case Mips::LL_R6:
case Mips::LWL:
case Mips::LWR:
*AddrIdx = 1;
@@ -223,6 +238,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
// Store instructions with base address register in position 2.
case Mips::SC:
+ case Mips::SC_R6:
*AddrIdx = 2;
if (IsStore)
*IsStore = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index a8fa272..fbe375b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -27,10 +27,43 @@
using namespace llvm;
-// Pin vtable to this file.
-void MipsTargetStreamer::anchor() {}
-
-MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), canHaveModuleDirective(true) {}
+void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
+void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {}
+void MipsTargetStreamer::emitDirectiveSetMips16() {}
+void MipsTargetStreamer::emitDirectiveSetNoMips16() {}
+void MipsTargetStreamer::emitDirectiveSetReorder() {}
+void MipsTargetStreamer::emitDirectiveSetNoReorder() {}
+void MipsTargetStreamer::emitDirectiveSetMacro() {}
+void MipsTargetStreamer::emitDirectiveSetNoMacro() {}
+void MipsTargetStreamer::emitDirectiveSetAt() {}
+void MipsTargetStreamer::emitDirectiveSetNoAt() {}
+void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {}
+void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
+void MipsTargetStreamer::emitDirectiveAbiCalls() {}
+void MipsTargetStreamer::emitDirectiveNaN2008() {}
+void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
+void MipsTargetStreamer::emitDirectiveOptionPic0() {}
+void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
+ unsigned ReturnReg) {}
+void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
+void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
+}
+void MipsTargetStreamer::emitDirectiveSetMips32R2() {}
+void MipsTargetStreamer::emitDirectiveSetMips64() {}
+void MipsTargetStreamer::emitDirectiveSetMips64R2() {}
+void MipsTargetStreamer::emitDirectiveSetDsp() {}
+void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
+ const MCSymbol &Sym, bool IsReg) {
+}
+void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ if (!Enabled && !IsO32ABI)
+ report_fatal_error("+nooddspreg is only valid for O32");
+}
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -38,42 +71,52 @@ MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() {
OS << "\t.set\tmicromips\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() {
OS << "\t.set\tnomicromips\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips16() {
OS << "\t.set\tmips16\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() {
OS << "\t.set\tnomips16\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetReorder() {
OS << "\t.set\treorder\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() {
OS << "\t.set\tnoreorder\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMacro() {
OS << "\t.set\tmacro\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() {
OS << "\t.set\tnomacro\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetAt() {
OS << "\t.set\tat\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoAt() {
OS << "\t.set\tnoat\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
@@ -110,24 +153,28 @@ void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() {
OS << "\t.set\tmips32r2\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64() {
OS << "\t.set\tmips64\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() {
OS << "\t.set\tmips64r2\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetDsp() {
OS << "\t.set\tdsp\n";
+ setCanHaveModuleDir(false);
}
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
OS << "0x";
for (int i = 7; i >= 0; i--)
- OS.write_hex((Value & (0xF << (i*4))) >> (i*4));
+ OS.write_hex((Value & (0xF << (i * 4))) >> (i * 4));
}
void MipsTargetAsmStreamer::emitMask(unsigned CPUBitmask,
@@ -147,6 +194,7 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) {
OS << "\t.cpload\t$"
<< StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -165,6 +213,34 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
OS << Sym.getName() << "\n";
+ setCanHaveModuleDir(false);
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleFP(
+ MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) {
+ MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI);
+
+ StringRef ModuleValue;
+ OS << "\t.module\tfp=";
+ OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetFp(
+ MipsABIFlagsSection::FpABIKind Value) {
+ StringRef ModuleValue;
+ OS << "\t.set\tfp=";
+ OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+}
+
+void MipsTargetAsmStreamer::emitMipsAbiFlags() {
+ // No action required for text output.
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+
+ OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n";
}
// This part is for ELF object output.
@@ -174,7 +250,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
MCAssembler &MCA = getStreamer().getAssembler();
uint64_t Features = STI.getFeatureBits();
Triple T(STI.getTargetTriple());
- Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
+ Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
? true
: false;
@@ -182,16 +258,28 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
unsigned EFlags = 0;
// Architecture
- if (Features & Mips::FeatureMips64r2)
+ if (Features & Mips::FeatureMips64r6)
+ EFlags |= ELF::EF_MIPS_ARCH_64R6;
+ else if (Features & Mips::FeatureMips64r2)
EFlags |= ELF::EF_MIPS_ARCH_64R2;
else if (Features & Mips::FeatureMips64)
EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Features & Mips::FeatureMips5)
+ EFlags |= ELF::EF_MIPS_ARCH_5;
else if (Features & Mips::FeatureMips4)
EFlags |= ELF::EF_MIPS_ARCH_4;
+ else if (Features & Mips::FeatureMips3)
+ EFlags |= ELF::EF_MIPS_ARCH_3;
+ else if (Features & Mips::FeatureMips32r6)
+ EFlags |= ELF::EF_MIPS_ARCH_32R6;
else if (Features & Mips::FeatureMips32r2)
EFlags |= ELF::EF_MIPS_ARCH_32R2;
else if (Features & Mips::FeatureMips32)
EFlags |= ELF::EF_MIPS_ARCH_32;
+ else if (Features & Mips::FeatureMips2)
+ EFlags |= ELF::EF_MIPS_ARCH_2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_1;
if (T.isArch64Bit()) {
if (Features & Mips::FeatureN32)
@@ -244,17 +332,17 @@ void MipsTargetELFStreamer::finish() {
ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, SectionKind::getMetadata());
OS.SwitchSection(Sec);
- OS.EmitIntValue(1, 1); // kind
+ OS.EmitIntValue(1, 1); // kind
OS.EmitIntValue(40, 1); // size
- OS.EmitIntValue(0, 2); // section
- OS.EmitIntValue(0, 4); // info
- OS.EmitIntValue(0, 4); // ri_gprmask
- OS.EmitIntValue(0, 4); // pad
- OS.EmitIntValue(0, 4); // ri_cpr[0]mask
- OS.EmitIntValue(0, 4); // ri_cpr[1]mask
- OS.EmitIntValue(0, 4); // ri_cpr[2]mask
- OS.EmitIntValue(0, 4); // ri_cpr[3]mask
- OS.EmitIntValue(0, 8); // ri_gp_value
+ OS.EmitIntValue(0, 2); // section
+ OS.EmitIntValue(0, 4); // info
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // pad
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 8); // ri_gp_value
} else {
const MCSectionELF *Sec =
Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC,
@@ -268,6 +356,7 @@ void MipsTargetELFStreamer::finish() {
OS.EmitIntValue(0, 4); // ri_cpr[3]mask
OS.EmitIntValue(0, 4); // ri_gp_value
}
+ emitMipsAbiFlags();
}
void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
@@ -276,11 +365,11 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
if (Value->getKind() != MCExpr::SymbolRef)
return;
const MCSymbol &RhsSym =
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
uint8_t Type = MCELF::GetType(Data);
- if ((Type != ELF::STT_FUNC)
- || !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+ if ((Type != ELF::STT_FUNC) ||
+ !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
return;
MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -305,6 +394,7 @@ void MipsTargetELFStreamer::emitDirectiveSetMicroMips() {
void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() {
MicroMipsEnabled = false;
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips16() {
@@ -312,14 +402,17 @@ void MipsTargetELFStreamer::emitDirectiveSetMips16() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_ARCH_ASE_M16;
MCA.setELFHeaderEFlags(Flags);
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoMips16() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetReorder() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
@@ -327,22 +420,27 @@ void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_NOREORDER;
MCA.setELFHeaderEFlags(Flags);
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMacro() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoMacro() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetAt() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoAt() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
@@ -411,19 +509,19 @@ void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask,
}
void MipsTargetELFStreamer::emitDirectiveSetMips32R2() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips64() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips64R2() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetDsp() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
@@ -473,6 +571,8 @@ void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
TmpInst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(TmpInst, STI);
+
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -528,4 +628,27 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
Inst.addOperand(MCOperand::CreateReg(Mips::GP));
Inst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(Inst, STI);
+
+ setCanHaveModuleDir(false);
+}
+
+void MipsTargetELFStreamer::emitMipsAbiFlags() {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCContext &Context = MCA.getContext();
+ MCStreamer &OS = getStreamer();
+ const MCSectionELF *Sec =
+ Context.getELFSection(".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS,
+ ELF::SHF_ALLOC, SectionKind::getMetadata());
+ MCSectionData &ABIShndxSD = MCA.getOrCreateSectionData(*Sec);
+ ABIShndxSD.setAlignment(8);
+ OS.SwitchSection(Sec);
+
+ OS << ABIFlagsSection;
+}
+
+void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+
+ ABIFlagsSection.OddSPReg = Enabled;
}
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index d95f9b0..b93017a 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -24,13 +24,13 @@ def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM_MM<0x2f>;
def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>,
LW_FM_MM<0x2e>;
def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>,
- LWXC1_FM_MM<0x48>;
+ LWXC1_FM_MM<0x48>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>,
- SWXC1_FM_MM<0x88>;
+ SWXC1_FM_MM<0x88>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>,
- LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2;
+ LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
- SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2;
+ SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
CEQS_FM_MM<0>;
@@ -38,9 +38,9 @@ def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
CEQS_FM_MM<1>;
def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>,
- BC1F_FM_MM<0x1c>;
+ BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>,
- BC1F_FM_MM<0x1d>;
+ BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6;
def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
ROUND_W_FM_MM<0, 0x6c>;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 9904bc6..87a3a3e 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -246,7 +246,6 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
}
def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
- def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>;
/// Branch Instructions
def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index ea16331..dd3bc9b 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -61,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
"Support 64-bit FP registers.">;
+def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
+ "Support for FPXX.">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
@@ -73,6 +75,9 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
+def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
+ "Disable odd numbered single-precision "
+ "registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index c01d03a..93706c2 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -16,6 +16,7 @@
#include "Mips16InstrInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,9 @@
using namespace llvm;
+Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
+
void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 3f7829d..1fb7eda 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -19,8 +19,7 @@
namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
- explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.stackAlignment()) {}
+ explicit Mips16FrameLowering(const MipsSubtarget &STI);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 4e86a27..6672aef 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- if (!Subtarget.inMips16Mode())
+ if (!Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
@@ -226,9 +226,9 @@ bool Mips16DAGToDAGISel::selectAddr16(
const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
if (LS) {
- if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2())
+ if (LS->getMemoryVT() == MVT::f32 && Subtarget->hasMips4_32r2())
return false;
- if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2())
+ if (LS->getMemoryVT() == MVT::f64 && Subtarget->hasMips4_32r2())
return false;
}
}
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 9102450..81a05df 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -120,13 +120,6 @@ static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
- //
- // set up as if mips32 and then revert so we can test the mechanism
- // for switching
- addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
- computeRegisterProperties();
- clearRegisterClasses();
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index df88333..2a5eec5 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Mips16ISELLOWERING_H
-#define Mips16ISELLOWERING_H
+#ifndef MIPS16ISELLOWERING_H
+#define MIPS16ISELLOWERING_H
#include "MipsISelLowering.h"
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 11166c4..5e4eebb 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1370,9 +1370,11 @@ def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
(Jal16 texternalsym:$dst)>;
// Indirect branch
-def: Mips16Pat<
- (brind CPU16Regs:$rs),
- (JrcRx16 CPU16Regs:$rs)>;
+def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> {
+ // Ensure that the addition of MIPS32r6/MIPS64r6 support does not change
+ // MIPS16's behaviour.
+ let AddedComplexity = 1;
+}
// Jump and Link (Call)
let isCall=1, hasDelaySlot=0 in
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index a3f9df5..e4ec96a 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -39,7 +39,10 @@ def OPGROUP_DAUI : OPGROUP<0b011101>;
def OPGROUP_PCREL : OPGROUP<0b111011>;
def OPGROUP_REGIMM : OPGROUP<0b000001>;
def OPGROUP_SPECIAL : OPGROUP<0b000000>;
+// The spec occasionally names this value LL, LLD, SC, or SCD.
def OPGROUP_SPECIAL3 : OPGROUP<0b011111>;
+// The spec names this constant LWC2, LDC2, SWC2, and SDC2 in different places.
+def OPGROUP_COP2LDST : OPGROUP<0b010010>;
class OPCODE2<bits<2> Val> {
bits<2> Value = Val;
@@ -48,6 +51,11 @@ def OPCODE2_ADDIUPC : OPCODE2<0b00>;
def OPCODE2_LWPC : OPCODE2<0b01>;
def OPCODE2_LWUPC : OPCODE2<0b10>;
+class OPCODE3<bits<3> Val> {
+ bits<3> Value = Val;
+}
+def OPCODE3_LDPC : OPCODE3<0b110>;
+
class OPCODE5<bits<5> Val> {
bits<5> Value = Val;
}
@@ -59,6 +67,13 @@ def OPCODE5_BC1EQZ : OPCODE5<0b01001>;
def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
+def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+// The next four constants are unnamed in the spec. These names are taken from
+// the OPGROUP names they are used with.
+def OPCODE5_LDC2 : OPCODE5<0b01110>;
+def OPCODE5_LWC2 : OPCODE5<0b01010>;
+def OPCODE5_SDC2 : OPCODE5<0b01111>;
+def OPCODE5_SWC2 : OPCODE5<0b01011>;
class OPCODE6<bits<6> Val> {
bits<6> Value = Val;
@@ -67,6 +82,22 @@ def OPCODE6_ALIGN : OPCODE6<0b100000>;
def OPCODE6_DALIGN : OPCODE6<0b100100>;
def OPCODE6_BITSWAP : OPCODE6<0b100000>;
def OPCODE6_DBITSWAP : OPCODE6<0b100100>;
+def OPCODE6_JALR : OPCODE6<0b001001>;
+def OPCODE6_CACHE : OPCODE6<0b100101>;
+def OPCODE6_PREF : OPCODE6<0b110101>;
+// The next four constants are unnamed in the spec. These names are taken from
+// the OPGROUP names they are used with.
+def OPCODE6_LL : OPCODE6<0b110110>;
+def OPCODE6_LLD : OPCODE6<0b110111>;
+def OPCODE6_SC : OPCODE6<0b100110>;
+def OPCODE6_SCD : OPCODE6<0b100111>;
+def OPCODE6_CLO : OPCODE6<0b010001>;
+def OPCODE6_CLZ : OPCODE6<0b010000>;
+def OPCODE6_DCLO : OPCODE6<0b010011>;
+def OPCODE6_DCLZ : OPCODE6<0b010010>;
+def OPCODE6_LSA : OPCODE6<0b000101>;
+def OPCODE6_DLSA : OPCODE6<0b010101>;
+def OPCODE6_SDBBP : OPCODE6<0b001110>;
class FIELD_FMT<bits<5> Val> {
bits<5> Value = Val;
@@ -77,22 +108,23 @@ def FIELD_FMT_D : FIELD_FMT<0b10001>;
class FIELD_CMP_COND<bits<5> Val> {
bits<5> Value = Val;
}
-def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>;
+// Note: The CMP_COND_FMT names differ from the C_COND_FMT names.
+def FIELD_CMP_COND_AF : FIELD_CMP_COND<0b00000>;
def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>;
def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>;
def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>;
-def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>;
+def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b00100>;
def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>;
-def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>;
+def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b00110>;
def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>;
-def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>;
-def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>;
+def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0b01000>;
+def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0b01001>;
def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>;
-def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>;
-def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>;
-def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>;
-def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>;
-def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>;
+def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0b01011>;
+def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0b01100>;
+def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0b01101>;
+def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0b01110>;
+def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0b01111>;
class FIELD_CMP_FORMAT<bits<5> Val> {
bits<5> Value = Val;
@@ -139,6 +171,17 @@ class DAUI_FM : AUI_FM {
let Inst{31-26} = OPGROUP_DAUI.Value;
}
+class BAL_FM : MipsR6Inst {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_REGIMM.Value;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = OPCODE5_BGEZAL.Value;
+ let Inst{15-0} = offset;
+}
+
class COP1_2R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst {
bits<5> fs;
bits<5> fd;
@@ -216,6 +259,18 @@ class PCREL19_FM<OPCODE2 Operation> : MipsR6Inst {
let Inst{18-0} = imm;
}
+class PCREL18_FM<OPCODE3 Operation> : MipsR6Inst {
+ bits<5> rs;
+ bits<18> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_PCREL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-18} = Operation.Value;
+ let Inst{17-0} = imm;
+}
+
class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst {
bits<5> rd;
bits<5> rt;
@@ -230,6 +285,36 @@ class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst {
let Inst{5-0} = Operation.Value;
}
+class SPECIAL3_MEM_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-7} = offset;
+ let Inst{6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class SPECIAL_2R_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00001;
+ let Inst{5-0} = Operation.Value;
+}
+
class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst {
bits<5> rd;
bits<5> rs;
@@ -245,6 +330,16 @@ class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst {
let Inst{5-0} = funct;
}
+class SPECIAL_SDBBP_FM : MipsR6Inst {
+ bits<20> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-6} = code_;
+ let Inst{5-0} = OPCODE6_SDBBP.Value;
+}
+
// This class is ambiguous with other branches:
// BEQC/BNEC require that rs > rt
class CMP_BRANCH_2R_OFF16_FM<OPGROUP funct> : MipsR6Inst {
@@ -355,6 +450,40 @@ class SPECIAL3_DALIGN_FM<OPCODE6 Operation> : MipsR6Inst {
let Inst{5-0} = Operation.Value;
}
+class SPECIAL3_LL_SC_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = rt;
+ let Inst{15-7} = offset;
+ let Inst{5-0} = Operation.Value;
+
+ string DecoderMethod = "DecodeSpecial3LlSc";
+}
+
+class SPECIAL_LSA_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<2> imm2;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = 0b000;
+ let Inst{7-6} = imm2;
+ let Inst{5-0} = Operation.Value;
+}
+
class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst {
bits<5> rs;
bits<16> imm;
@@ -384,3 +513,31 @@ class COP1_CMP_CONDN_FM<FIELD_CMP_FORMAT Format,
let Inst{4-0} = Cond.Value;
}
+class JR_HB_R6_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class COP2LDST_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<11> offset = addr{10-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP2LDST.Value;
+ let Inst{25-21} = Operation.Value;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = base;
+ let Inst{10-0} = offset;
+}
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index ffaf965..d06e5ca 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -14,39 +14,8 @@
include "Mips32r6InstrFormats.td"
// Notes about removals/changes from MIPS32r6:
-// Unclear: ssnop
-// Reencoded: cache, pref
-// Reencoded: clo, clz
// Reencoded: jr -> jalr
// Reencoded: jr.hb -> jalr.hb
-// Reencoded: ldc2
-// Reencoded: ll, sc
-// Reencoded: lwc2
-// Reencoded: sdbbp
-// Reencoded: sdc2
-// Reencoded: swc2
-// Removed: bc1any2, bc1any4
-// Removed: bc2[ft]
-// Removed: bc2f, bc2t
-// Removed: bgezal
-// Removed: bltzal
-// Removed: c.cond.fmt, bc1[ft]
-// Removed: div, divu
-// Removed: jalx
-// Removed: ldxc1
-// Removed: luxc1
-// Removed: lwxc1
-// Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds]
-// Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul
-// Removed: movf, movt
-// Removed: movf.fmt, movt.fmt, movn.fmt, movz.fmt
-// Removed: movn, movz
-// Removed: mult, multu
-// Removed: prefx
-// Removed: sdxc1
-// Removed: suxc1
-// Removed: swxc1
-// Rencoded: [ls][wd]c2
def brtarget21 : Operand<OtherVT> {
let EncoderMethod = "getBranchTarget21OpValue";
@@ -84,6 +53,7 @@ class ALUIPC_ENC : PCREL16_FM<OPCODE5_ALUIPC>;
class AUI_ENC : AUI_FM;
class AUIPC_ENC : PCREL16_FM<OPCODE5_AUIPC>;
+class BAL_ENC : BAL_FM;
class BALC_ENC : BRANCH_OFF26_FM<0b111010>;
class BC_ENC : BRANCH_OFF26_FM<0b110010>;
class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
@@ -97,11 +67,20 @@ class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_DADDI>,
class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZL>,
DecodeDisambiguates<"BgtzlGroupBranch">;
+class BGEC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZL>,
+ DecodeDisambiguatedBy<"BlezlGroupBranch">;
+class BGEUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguatedBy<"BlezGroupBranch">;
class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>,
DecodeDisambiguates<"BlezlGroupBranch">;
class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>,
DecodeDisambiguatedBy<"BgtzGroupBranch">;
+class BLTC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZL>,
+ DecodeDisambiguatedBy<"BgtzlGroupBranch">;
+class BLTUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZ>,
+ DecodeDisambiguatedBy<"BgtzGroupBranch">;
+
class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>,
DecodeDisambiguatedBy<"BlezlGroupBranch">;
class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>,
@@ -110,7 +89,8 @@ class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZL>,
DecodeDisambiguatedBy<"BgtzlGroupBranch">;
class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>;
-class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>;
+class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguates<"BlezGroupBranch">;
class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>;
class BC1EQZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1EQZ>;
@@ -120,9 +100,10 @@ class BC2NEZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2NEZ>;
class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>;
class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>;
-
+class JR_HB_R6_ENC : JR_HB_R6_FM<OPCODE6_JALR>;
class BITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_BITSWAP>;
-class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>;
+class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguatedBy<"BlezGroupBranch">;
class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_DADDI>,
DecodeDisambiguatedBy<"DaddiGroupBranch">;
class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
@@ -170,12 +151,23 @@ class RINT_D_ENC : COP1_2R_FM<0b011010, FIELD_FMT_D>;
class CLASS_S_ENC : COP1_2R_FM<0b011011, FIELD_FMT_S>;
class CLASS_D_ENC : COP1_2R_FM<0b011011, FIELD_FMT_D>;
-class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpnd> {
- dag OutOperandList = (outs FGROpnd:$fd);
- dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
- string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
- list<dag> Pattern = [];
-}
+class CACHE_ENC : SPECIAL3_MEM_FM<OPCODE6_CACHE>;
+class PREF_ENC : SPECIAL3_MEM_FM<OPCODE6_PREF>;
+
+class LDC2_R6_ENC : COP2LDST_FM<OPCODE5_LDC2>;
+class LWC2_R6_ENC : COP2LDST_FM<OPCODE5_LWC2>;
+class SDC2_R6_ENC : COP2LDST_FM<OPCODE5_SDC2>;
+class SWC2_R6_ENC : COP2LDST_FM<OPCODE5_SWC2>;
+
+class LSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_LSA>;
+
+class LL_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LL>;
+class SC_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SC>;
+
+class CLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLO>;
+class CLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLZ>;
+
+class SDBBP_R6_ENC : SPECIAL_SDBBP_FM;
//===----------------------------------------------------------------------===//
//
@@ -183,56 +175,65 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpn
//
//===----------------------------------------------------------------------===//
+class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
+ RegisterOperand FGROpnd,
+ SDPatternOperator Op = null_frag> {
+ dag OutOperandList = (outs FGRCCOpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [(set FGRCCOpnd:$fd, (Op FGROpnd:$fs, FGROpnd:$ft))];
+}
+
multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
RegisterOperand FGROpnd>{
- def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_F>,
- CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>,
+ def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
ISA_MIPS32R6;
def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
- CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
ISA_MIPS32R6;
def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
- CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
ISA_MIPS32R6;
def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
- CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_OLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLT>,
- CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
ISA_MIPS32R6;
+ def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
+ ISA_MIPS32R6;
def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
- CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_OLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLE>,
- CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
ISA_MIPS32R6;
+ def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+ ISA_MIPS32R6;
def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
- CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+ ISA_MIPS32R6;
+ def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_SF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SF>,
- CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGLE>,
- CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>,
- ISA_MIPS32R6;
def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_NGL_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGL>,
- CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
- CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGE>,
- CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>,
+ def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
- CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGT>,
- CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>,
+ def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
ISA_MIPS32R6;
+ def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
}
//===----------------------------------------------------------------------===//
@@ -241,16 +242,17 @@ multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
//
//===----------------------------------------------------------------------===//
-class PCREL19_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> {
dag OutOperandList = (outs GPROpnd:$rs);
- dag InOperandList = (ins simm19_lsl2:$imm);
+ dag InOperandList = (ins ImmOpnd:$imm);
string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
list<dag> Pattern = [];
}
-class ADDIUPC_DESC : PCREL19_DESC_BASE<"addiupc", GPR32Opnd>;
-class LWPC_DESC: PCREL19_DESC_BASE<"lwpc", GPR32Opnd>;
-class LWUPC_DESC: PCREL19_DESC_BASE<"lwupc", GPR32Opnd>;
+class ADDIUPC_DESC : PCREL_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2>;
+class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>;
+class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2>;
class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> {
@@ -318,15 +320,26 @@ class CMP_CBR_RT_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
list<Register> Defs = [AT];
}
+class BAL_DESC : BC_DESC_BASE<"bal", brtarget> {
+ bit isCall = 1;
+ bit hasDelaySlot = 1;
+ list<Register> Defs = [RA];
+}
+
class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> {
bit isCall = 1;
list<Register> Defs = [RA];
}
class BC_DESC : BC_DESC_BASE<"bc", brtarget26>;
+class BGEC_DESC : CMP_BC_DESC_BASE<"bgec", brtarget, GPR32Opnd>;
+class BGEUC_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR32Opnd>;
class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>;
class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>;
+class BLTC_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR32Opnd>;
+class BLTUC_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR32Opnd>;
+
class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>;
class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>;
@@ -380,6 +393,14 @@ class JIC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd> {
list<Register> Defs = [AT];
}
+class JR_HB_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+ bit isBranch = 1;
+ bit isIndirectBranch = 1;
+ bit hasDelaySlot = 1;
+ bit isTerminator=1;
+ bit isBarrier=1;
+}
+
class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rt);
@@ -389,17 +410,22 @@ class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>;
-class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator Op=null_frag> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))];
+
+ // This instruction doesn't trap division by zero itself. We must insert
+ // teq instructions as well.
+ bit usesCustomInserter = 1;
}
-class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd>;
-class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd>;
-class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd>;
-class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd>;
+class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd, sdiv>;
+class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd, udiv>;
+class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd, srem>;
+class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd, urem>;
class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> {
list<Register> Defs = [RA];
@@ -424,28 +450,35 @@ class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> {
class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> {
list<Register> Defs = [RA];
}
-class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+
+class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator Op=null_frag> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))];
}
-class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd>;
-class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd>;
-class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd>;
+class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd, mulhs>;
+class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd, mulhu>;
+class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd, mul>;
class MULU_DESC : MUL_R6_DESC_BASE<"mulu", GPR32Opnd>;
-class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
dag OutOperandList = (outs FGROpnd:$fd);
- dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
+ dag InOperandList = (ins FGRCCOpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set FGROpnd:$fd, (select FGRCCOpnd:$fd_in,
+ FGROpnd:$ft,
+ FGROpnd:$fs))];
string Constraints = "$fd_in = $fd";
}
-class SEL_D_DESC : COP1_4R_DESC_BASE<"sel.d", FGR64Opnd>;
-class SEL_S_DESC : COP1_4R_DESC_BASE<"sel.s", FGR32Opnd>;
+class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+ // We must insert a SUBREG_TO_REG around $fd_in
+ bit usesCustomInserter = 1;
+}
+class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rd);
@@ -457,6 +490,14 @@ class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
class SELEQZ_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR32Opnd>;
class SELNEZ_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR32Opnd>;
+class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+ string Constraints = "$fd_in = $fd";
+}
+
class MADDF_S_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>;
class MADDF_D_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>;
class MSUBF_S_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>;
@@ -503,6 +544,96 @@ class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
+ string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+ list<dag> Pattern = [];
+}
+
+class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
+class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd>;
+
+class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+ dag OutOperandList = (outs COPOpnd:$rt);
+ dag InOperandList = (ins mem_simm11:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+}
+
+class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
+class LWC2_R6_DESC : COP2LD_DESC_BASE<"lwc2", COP2Opnd>;
+
+class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins COPOpnd:$rt, mem_simm11:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+}
+
+class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
+class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
+
+class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2");
+ list<dag> Pattern = [];
+}
+
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+
+class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+}
+
+class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd>;
+
+class SC_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$dst);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+ string Constraints = "$rt = $dst";
+}
+
+class SC_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd>;
+
+class CLO_CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+}
+
+class CLO_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> :
+ CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> {
+ list<dag> Pattern = [(set GPROpnd:$rd, (ctlz (not GPROpnd:$rs)))];
+}
+
+class CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> :
+ CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> {
+ list<dag> Pattern = [(set GPROpnd:$rd, (ctlz GPROpnd:$rs))];
+}
+
+class CLO_R6_DESC : CLO_R6_DESC_BASE<"clo", GPR32Opnd>;
+class CLZ_R6_DESC : CLZ_R6_DESC_BASE<"clz", GPR32Opnd>;
+
+class SDBBP_R6_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm20:$code_);
+ string AsmString = "sdbbp\t$code_";
+ list<dag> Pattern = [];
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
@@ -514,6 +645,7 @@ def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6;
def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6;
def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6;
def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
+def BAL : BAL_ENC, BAL_DESC, ISA_MIPS32R6;
def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6;
def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6;
def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6;
@@ -523,8 +655,8 @@ def BC : BC_ENC, BC_DESC, ISA_MIPS32R6;
def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
-def BGEC; // Also aliased to blec with operands swapped
-def BGEUC; // Also aliased to bleuc with operands swapped
+def BGEC : BGEC_ENC, BGEC_DESC, ISA_MIPS32R6;
+def BGEUC : BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6;
def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
@@ -532,8 +664,8 @@ def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
-def BLTC; // Also aliased to bgtc with operands swapped
-def BLTUC; // Also aliased to bgtuc with operands swapped
+def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
+def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
@@ -541,15 +673,22 @@ def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
+def CACHE_R6 : CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6;
def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6;
+def CLO_R6 : CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
+def CLZ_R6 : CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>;
def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6;
def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
-// def LSA; // See MSA
+def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
+def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6;
+def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6;
+def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
+def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6;
@@ -571,13 +710,115 @@ def MUHU : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
def MULU : MULU_ENC, MULU_DESC, ISA_MIPS32R6;
def NAL; // BAL with rd=0
+def PREF_R6 : PREF_ENC, PREF_DESC, ISA_MIPS32R6;
def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6;
def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6;
-def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6;
+def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
+def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
+def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
+def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6;
def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6;
-def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6;
+def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6;
def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6;
def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6;
def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6;
+def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Patterns and Pseudo Instructions
+//
+//===----------------------------------------------------------------------===//
+
+// f32 comparisons supported via another comparison
+def : MipsPat<(setone f32:$lhs, f32:$rhs),
+ (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seto f32:$lhs, f32:$rhs),
+ (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(setune f32:$lhs, f32:$rhs),
+ (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setne f32:$lhs, f32:$rhs),
+ (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+
+// f64 comparisons supported via another comparison
+def : MipsPat<(setone f64:$lhs, f64:$rhs),
+ (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seto f64:$lhs, f64:$rhs),
+ (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(setune f64:$lhs, f64:$rhs),
+ (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setne f64:$lhs, f64:$rhs),
+ (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+
+// i32 selects
+def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
+ (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f),
+ (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
+ (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
+ (OR (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)),
+ (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
+ i32:$f),
+ (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))),
+ (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)),
+ i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))),
+ (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>,
+ ISA_MIPS32R6;
+
+def : MipsPat<(select i32:$cond, i32:$t, immz),
+ (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz),
+ (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz),
+ (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select i32:$cond, immz, i32:$f),
+ (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f),
+ (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f),
+ (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 924b325..f0b6814 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -23,6 +23,8 @@ def uimm16_64 : Operand<i64> {
// Signed Operand
def simm10_64 : Operand<i64>;
+def imm64: Operand<i64>;
+
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
return getImm(N, (unsigned)N->getZExtValue() - 32);
@@ -36,6 +38,9 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
def immSExt10_64 : PatLeaf<(i64 imm),
[{ return isInt<10>(N->getSExtValue()); }]>;
+def immZExt16_64 : PatLeaf<(i64 imm),
+ [{ return isInt<16>(N->getZExtValue()); }]>;
+
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -62,7 +67,7 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>,
- ISA_MIPS3;
+ ISA_MIPS3_NOT_32R6_64R6;
def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU,
immSExt16, add>,
ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3;
@@ -164,49 +169,58 @@ def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>,
ISA_MIPS3_NOT_32R6_64R6;
/// Load-linked, Store-conditional
-def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3;
-def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3;
+def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3_NOT_32R6_64R6;
+def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
-def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
-def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
-def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
-def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
-def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
-def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
-def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
-def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
-def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
+ def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
+ def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
+ def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
+ def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
+ def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
+ def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
+ def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
+ def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+ def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+ def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
}
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
+def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
+
/// Multiply and Divide Instructions.
def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1c>, ISA_MIPS3;
+ MULT_FM<0, 0x1c>, ISA_MIPS3_NOT_32R6_64R6;
def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1d>, ISA_MIPS3;
+ MULT_FM<0, 0x1d>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult,
- II_DMULT>;
+ II_DMULT>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu,
- II_DMULTU>;
+ II_DMULTU>, ISA_MIPS3_NOT_32R6_64R6;
def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1e>, ISA_MIPS3;
+ MULT_FM<0, 0x1e>, ISA_MIPS3_NOT_32R6_64R6;
def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1f>, ISA_MIPS3;
+ MULT_FM<0, 0x1f>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem,
- II_DDIV, 0, 1, 1>;
+ II_DDIV, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU,
- II_DDIVU, 0, 1, 1>;
+ II_DDIVU, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
-def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>;
-def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>;
-def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>;
-def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>;
-def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>;
-def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>;
-def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>;
+def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>, ISA_MIPS3_NOT_32R6_64R6;
/// Sign Ext In Register Instructions.
def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>,
@@ -216,8 +230,8 @@ def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
}
/// Count Leading
-def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64;
-def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64;
+def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64_NOT_64R6;
+def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64_NOT_64R6;
/// Double Word Swap Bytes/HalfWords
def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2;
@@ -431,13 +445,13 @@ def : MipsInstAlias<"daddu $rs, $rt, $imm",
0>;
def : MipsInstAlias<"dadd $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>;
def : MipsInstAlias<"dadd $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"add $rs, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
0>;
@@ -450,10 +464,22 @@ def : MipsInstAlias<"dsll $rd, $rt, $rs",
def : MipsInstAlias<"dsubu $rt, $rs, $imm",
(DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
InvertedImOperand64:$imm), 0>;
+def : MipsInstAlias<"dsubi $rs, $rt, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
+def : MipsInstAlias<"dsubi $rs, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
+def : MipsInstAlias<"dsub $rs, $rt, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"dsub $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"dsubu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
@@ -465,6 +491,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
(DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
+class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
+ !strconcat(instr_asm, "\t$rt, $imm64")> ;
+def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index f971218..63cf60b 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -13,10 +13,6 @@
// Notes about removals/changes from MIPS32r6:
// Reencoded: dclo, dclz
-// Reencoded: lld, scd
-// Removed: daddi
-// Removed: ddiv, ddivu, dmult, dmultu
-// Removed: div, divu
//===----------------------------------------------------------------------===//
//
@@ -29,14 +25,20 @@ class DAUI_ENC : DAUI_FM;
class DAHI_ENC : REGIMM_FM<OPCODE5_DAHI>;
class DATI_ENC : REGIMM_FM<OPCODE5_DATI>;
class DBITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_DBITSWAP>;
+class DCLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLO>;
+class DCLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLZ>;
class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>;
class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>;
+class DLSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_DLSA>;
class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>;
class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>;
-class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>;
-class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b111001>;
-class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b111000>;
-class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>;
+class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b011100>;
+class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011101>;
+class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>;
+class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b011101>;
+class LDPC_ENC : PCREL18_FM<OPCODE3_LDPC>;
+class LLD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LLD>;
+class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>;
//===----------------------------------------------------------------------===//
//
@@ -56,14 +58,22 @@ class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>;
class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>;
class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>;
class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>;
-class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>;
-class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd>;
-class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd>;
-class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd>;
-class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd>;
-class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd>;
-class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd>;
+class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
+class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
+class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
+class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
+class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
+class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
+class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
+class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, mulhu>;
+class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, mul>;
class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd>;
+class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3>;
+class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd>;
+class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd>;
+class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
+class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
//===----------------------------------------------------------------------===//
//
@@ -76,13 +86,132 @@ def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
+def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
+def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
-// def DLSA; // See MSA
+def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6;
def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6;
def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6;
-def LDPC;
+def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6;
+def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS32R6;
+def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
+let DecoderNamespace = "Mips32r6_64r6_GP64" in {
+ def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
+ def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"jr $rs", (JALR64 ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS64R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Patterns and Pseudo Instructions
+//
+//===----------------------------------------------------------------------===//
+
+// i64 selects
+def : MipsPat<(select i64:$cond, i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, i64:$cond),
+ (SELEQZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, i64:$cond),
+ (SELNEZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, i64:$cond),
+ (SELEQZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)),
+ (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)),
+ (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>,
+ ISA_MIPS64R6;
+def : MipsPat<
+ (select (i32 (setgt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t,
+ (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)),
+ (SELNEZ64 i64:$f,
+ (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)))>,
+ ISA_MIPS64R6;
+def : MipsPat<
+ (select (i32 (setugt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t,
+ (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)),
+ (SELNEZ64 i64:$f,
+ (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)))>,
+ ISA_MIPS64R6;
+
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, immz),
+ (SELNEZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, immz),
+ (SELEQZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), immz, i64:$f),
+ (SELEQZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), immz, i64:$f),
+ (SELNEZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6;
+
+// i64 selects from an i32 comparison
+// One complicating factor here is that bits 32-63 of an i32 are undefined.
+// FIXME: Ideally, setcc would always produce an i64 on MIPS64 targets.
+// This would allow us to remove the sign-extensions here.
+def : MipsPat<(select i32:$cond, i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))),
+ (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))),
+ (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))))>,
+ ISA_MIPS64R6;
+
+def : MipsPat<(select i32:$cond, i64:$t, immz),
+ (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, immz),
+ (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, immz),
+ (SELEQZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select i32:$cond, immz, i64:$f),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
+ (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 6df90aa..1fb75a2 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -91,7 +91,46 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
#include "MipsGenMCPseudoLowering.inc"
+// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to JR, JR_MM,
+// JALR, or JALR64 as appropriate for the target
+void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
+ const MachineInstr *MI) {
+ bool HasLinkReg = false;
+ MCInst TmpInst0;
+
+ if (Subtarget->hasMips64r6()) {
+ // MIPS64r6 should use (JALR64 ZERO_64, $rs)
+ TmpInst0.setOpcode(Mips::JALR64);
+ HasLinkReg = true;
+ } else if (Subtarget->hasMips32r6()) {
+ // MIPS32r6 should use (JALR ZERO, $rs)
+ TmpInst0.setOpcode(Mips::JALR);
+ HasLinkReg = true;
+ } else if (Subtarget->inMicroMipsMode())
+ // microMIPS should use (JR_MM $rs)
+ TmpInst0.setOpcode(Mips::JR_MM);
+ else {
+ // Everything else should use (JR $rs)
+ TmpInst0.setOpcode(Mips::JR);
+ }
+
+ MCOperand MCOp;
+
+ if (HasLinkReg) {
+ unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
+ TmpInst0.addOperand(MCOperand::CreateReg(ZeroReg));
+ }
+
+ lowerOperand(MI->getOperand(0), MCOp);
+ TmpInst0.addOperand(MCOp);
+
+ EmitToStreamer(OutStreamer, TmpInst0);
+}
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MipsTargetStreamer &TS = getTargetStreamer();
+ TS.setCanHaveModuleDir(false);
+
if (MI->isDebugValue()) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -141,6 +180,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(OutStreamer, &*I))
continue;
+ if (I->getOpcode() == Mips::PseudoReturn ||
+ I->getOpcode() == Mips::PseudoReturn64 ||
+ I->getOpcode() == Mips::PseudoIndirectBranch ||
+ I->getOpcode() == Mips::PseudoIndirectBranch64) {
+ emitPseudoIndirectBranch(OutStreamer, &*I);
+ continue;
+ }
+
// The inMips16Mode() test is not permanent.
// Some instructions are marked as pseudo right now which
// would make the test fail for the wrong reason but
@@ -657,6 +704,13 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutContext.getELFSection(".gcc_compiled_long64", ELF::SHT_PROGBITS, 0,
SectionKind::getDataRel()));
}
+
+ getTargetStreamer().updateABIInfo(*Subtarget);
+ getTargetStreamer().emitDirectiveModuleFP();
+
+ if (Subtarget->isABI_O32())
+ getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(),
+ Subtarget->isABI_O32());
}
void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) {
@@ -852,7 +906,7 @@ void MipsAsmPrinter::EmitFPCallStub(
TS.emitDirectiveSetNoMicroMips();
//
// .ent __call_stub_fp_xxxx
- // .type __call_stub_fp_xxxx,@function
+ // .type __call_stub_fp_xxxx,@function
// __call_stub_fp_xxxx:
//
std::string x = "__call_stub_fp_" + std::string(Symbol);
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index e82b145..967aa0b 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -40,6 +40,12 @@ private:
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
+ // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch,
+ // and PseudoIndirectBranch64 as a JR, JR_MM, JALR, or JALR64 as appropriate
+ // for the target.
+ void emitPseudoIndirectBranch(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index c83d880..007213c 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -239,6 +239,11 @@ def RetCC_Mips : CallingConv<[
def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
(sequence "S%u", 7, 0))>;
+def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
+ (sequence "S%u", 7, 0))> {
+ let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2));
+}
+
def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
(sequence "S%u", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 13fa546..151ef13 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -124,6 +124,7 @@ private:
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const;
/// Expand pseudo instructions with accumulator register operands.
void expandACCInstr(MachineBasicBlock::instr_iterator MI,
@@ -273,6 +274,12 @@ unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI,
unsigned OpNo) const {
llvm_unreachable("Unimplemented function.");
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 7177f65..690f626 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -104,136 +104,162 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
// Instantiation of instructions.
def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>, INSN_MIPS4_32;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
}
def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
}
def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>, INSN_MIPS4_32;
+ CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>, AdditionalRequires<[HasMips64]>;
+ CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[HasMips64]>;
def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>, INSN_MIPS4_32;
+ CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>, AdditionalRequires<[IsGP64bit]>;
+ CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd,
II_MOVZ_D>, CMov_I_F_FM<18, 17>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd,
II_MOVN_D>, CMov_I_F_FM<19, 17>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
let DecoderNamespace = "Mips64" in {
def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>,
- CMov_I_F_FM<18, 17>, INSN_MIPS4_32, FGR_64;
+ CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>,
- CMov_I_F_FM<19, 17>, INSN_MIPS4_32, FGR_64;
+ CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
let isCodeGenOnly = 1 in {
- def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd,
- II_MOVZ_D>, CMov_I_F_FM<18, 17>, FGR_64;
- def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd,
- II_MOVN_D>, CMov_I_F_FM<19, 17>, FGR_64;
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, II_MOVZ_D>,
+ CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, II_MOVN_D>,
+ CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
}
}
def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>, INSN_MIPS4_32;
+ CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>, AdditionalRequires<[IsGP64bit]>;
+ CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>, INSN_MIPS4_32;
+ CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>, AdditionalRequires<[IsGP64bit]>;
+ CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>,
- CMov_F_F_FM<16, 1>, INSN_MIPS4_32;
+ CMov_F_F_FM<16, 1>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>,
- CMov_F_F_FM<16, 0>, INSN_MIPS4_32;
+ CMov_F_F_FM<16, 0>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D,
MipsCMovFP_T>, CMov_F_F_FM<17, 1>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D,
MipsCMovFP_F>, CMov_F_F_FM<17, 0>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
let DecoderNamespace = "Mips64" in {
def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>,
- CMov_F_F_FM<17, 1>, INSN_MIPS4_32, FGR_64;
+ CMov_F_F_FM<17, 1>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>,
- CMov_F_F_FM<17, 0>, INSN_MIPS4_32, FGR_64;
+ CMov_F_F_FM<17, 0>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
}
// Instantiation of conditional move patterns.
-defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
-defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>;
-defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>;
+defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>, INSN_MIPS4_32_NOT_32R6_64R6;
-defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>, GPR_64;
+defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovzPats0<GPR64, GPR32, MOVZ_I_I, SLT64, SLTu64, SLTi64, SLTiu64>,
- GPR_64;
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovzPats0<GPR64, GPR64, MOVZ_I_I64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+
+defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+
+defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
-defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>, GPR_64;
-defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>, GPR_64;
-defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>, GPR_64;
-defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>, GPR_64;
-defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>, GPR_64;
-defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>, GPR_64;
-
-defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>;
-
-defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, GPR_64;
-defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, GPR_64;
-defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, GPR_64;
-defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
-defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>;
-defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>;
+defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
-defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, GPR_64;
-defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, GPR_64;
-defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>, FGR_32;
-defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, FGR_32;
-defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, FGR_32;
+defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
+defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_32;
+defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_32;
-defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>, FGR_64;
+defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
defm : MovzPats0<GPR64, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_64;
+defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_64;
+defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
FGR_64;
-defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, FGR_64;
-defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>, FGR_64;
-defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, FGR_64;
-defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, FGR_64;
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index cf09113..b5d52ce 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-def HasDSP : Predicate<"Subtarget.hasDSP()">,
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP">;
-def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">,
+def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
AssemblerPredicate<"FeatureDSPR2">;
// Fields.
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d6c7cac..bcfbc12 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -177,6 +178,13 @@ namespace {
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
Changed |= runOnMachineBasicBlock(*FI);
+
+ // This pass invalidates liveness information when it reorders
+ // instructions to fill delay slot. Without this, -verify-machineinstrs
+ // will fail.
+ if (Changed)
+ F.getRegInfo().invalidateLiveness();
+
return Changed;
}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 268a0ed..617801b 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -12,6 +12,7 @@
#include "MipsISelLowering.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
using namespace llvm;
@@ -36,11 +37,11 @@ class MipsFastISel final : public FastISel {
/// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
/// make the right decision when generating code for different targets.
- const MipsSubtarget *Subtarget;
Module &M;
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
+ const MipsSubtarget *Subtarget;
MipsFunctionInfo *MFI;
// Convenience variables to avoid some queries.
@@ -54,8 +55,8 @@ public:
: FastISel(funcInfo, libInfo),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
- Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ TLI(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
Context = &funcInfo.Fn->getContext();
TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) &&
@@ -68,8 +69,11 @@ public:
bool ComputeAddress(const Value *Obj, Address &Addr);
private:
+ bool EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0);
bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
+ bool SelectLoad(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectStore(const Instruction *I);
@@ -80,6 +84,36 @@ private:
unsigned MaterializeGV(const GlobalValue *GV, MVT VT);
unsigned MaterializeInt(const Constant *C, MVT VT);
unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+
+ // for some reason, this default is not generated by tablegen
+ // so we explicitly generate it here.
+ //
+ unsigned FastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill, uint64_t imm1,
+ uint64_t imm2, unsigned Op3, bool Op3IsKill) {
+ return 0;
+ }
+
+ MachineInstrBuilder EmitInst(unsigned Opc) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ }
+
+ MachineInstrBuilder EmitInst(unsigned Opc, unsigned DstReg) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ DstReg);
+ }
+
+ MachineInstrBuilder EmitInstStore(unsigned Opc, unsigned SrcReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return EmitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+ }
+
+ MachineInstrBuilder EmitInstLoad(unsigned Opc, unsigned DstReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return EmitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+ }
+
+#include "MipsGenFastISel.inc"
};
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
@@ -100,6 +134,8 @@ bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// We will extend this in a later patch:
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
return false;
}
@@ -116,6 +152,45 @@ bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) {
return Addr.Base.Reg != 0;
}
+bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment) {
+ //
+ // more cases will be handled here in following patches.
+ //
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ case MVT::i32: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LW;
+ break;
+ }
+ case MVT::i16: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LHu;
+ break;
+ }
+ case MVT::i8: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LBu;
+ break;
+ }
+ case MVT::f32: {
+ ResultReg = createResultReg(&Mips::FGR32RegClass);
+ Opc = Mips::LWC1;
+ break;
+ }
+ case MVT::f64: {
+ ResultReg = createResultReg(&Mips::AFGR64RegClass);
+ Opc = Mips::LDC1;
+ break;
+ }
+ default:
+ return false;
+ }
+ EmitInstLoad(Opc, ResultReg, Addr.Base.Reg, Addr.Offset);
+ return true;
+}
+
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) {
@@ -141,12 +216,49 @@ bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
//
// more cases will be handled here in following patches.
//
- if (VT != MVT::i32)
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opc = Mips::SB;
+ break;
+ case MVT::i16:
+ Opc = Mips::SH;
+ break;
+ case MVT::i32:
+ Opc = Mips::SW;
+ break;
+ case MVT::f32:
+ Opc = Mips::SWC1;
+ break;
+ case MVT::f64:
+ Opc = Mips::SDC1;
+ break;
+ default:
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::SW))
- .addReg(SrcReg)
- .addReg(Addr.Base.Reg)
- .addImm(Addr.Offset);
+ }
+ EmitInstStore(Opc, SrcReg, Addr.Base.Reg, Addr.Offset);
+ return true;
+}
+
+bool MipsFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg;
+ if (!EmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
+ UpdateValueMap(I, ResultReg);
return true;
}
@@ -186,8 +298,7 @@ bool MipsFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
return false;
}
- unsigned RetOpc = Mips::RetRA;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc));
+ EmitInst(Mips::RetRA);
return true;
}
@@ -197,6 +308,8 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
+ case Instruction::Load:
+ return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Ret:
@@ -207,6 +320,22 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
}
unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VT == MVT::f32) {
+ const TargetRegisterClass *RC = &Mips::FGR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg = Materialize32BitInt(Imm, &Mips::GPR32RegClass);
+ EmitInst(Mips::MTC1, DestReg).addReg(TempReg);
+ return DestReg;
+ } else if (VT == MVT::f64) {
+ const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg1 = Materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
+ unsigned TempReg2 =
+ Materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
+ EmitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
+ return DestReg;
+ }
return 0;
}
@@ -221,9 +350,8 @@ unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) {
// TLS not supported at this time.
if (IsThreadLocal)
return 0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LW), DestReg)
- .addReg(MFI->getGlobalBaseReg())
- .addGlobalAddress(GV, 0, MipsII::MO_GOT);
+ EmitInst(Mips::LW, DestReg).addReg(MFI->getGlobalBaseReg()).addGlobalAddress(
+ GV, 0, MipsII::MO_GOT);
return DestReg;
}
unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) {
@@ -245,15 +373,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
if (isInt<16>(Imm)) {
unsigned Opc = Mips::ADDiu;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Mips::ZERO)
- .addImm(Imm);
+ EmitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
return ResultReg;
} else if (isUInt<16>(Imm)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
- ResultReg)
- .addReg(Mips::ZERO)
- .addImm(Imm);
+ EmitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
return ResultReg;
}
unsigned Lo = Imm & 0xFFFF;
@@ -261,16 +384,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
- TmpReg).addImm(Hi);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
- ResultReg)
- .addReg(TmpReg)
- .addImm(Lo);
-
+ EmitInst(Mips::LUi, TmpReg).addImm(Hi);
+ EmitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
} else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
- ResultReg).addImm(Hi);
+ EmitInst(Mips::LUi, ResultReg).addImm(Hi);
}
return ResultReg;
}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index e10a3a5..8e9196c 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -15,7 +15,6 @@
#define MIPS_FRAMEINFO_H
#include "Mips.h"
-#include "MipsSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 90cff63..0bdabf3 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -47,6 +47,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
processFunctionAfterISel(MF);
@@ -202,7 +203,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
- assert((Subtarget.systemSupportsUnalignedAccess() ||
+ assert((Subtarget->systemSupportsUnalignedAccess() ||
cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
cast<MemSDNode>(Node)->getAlignment()) &&
"Unexpected unaligned loads/stores.");
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 13becb6..2a6c875 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -32,7 +32,7 @@ namespace llvm {
class MipsDAGToDAGISel : public SelectionDAGISel {
public:
explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+ : SelectionDAGISel(TM), Subtarget(&TM.getSubtarget<MipsSubtarget>()) {}
// Pass Name
const char *getPassName() const override {
@@ -46,7 +46,7 @@ protected:
/// Keep a pointer to the MipsSubtarget around so that we can make the right
/// decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
+ const MipsSubtarget *Subtarget;
private:
// Include the pieces autogenerated from the target description.
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index bfe5ea1..b7af2d4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -215,6 +215,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA
+ // does. Integer booleans still use 0 and 1.
+ if (Subtarget->hasMips32r6())
+ setBooleanContents(ZeroOrOneBooleanContent,
+ ZeroOrNegativeOneBooleanContent);
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -251,7 +256,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
@@ -263,14 +268,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
}
- if (!isGP64bit()) {
+ if (!Subtarget->isGP64bit()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
setOperationAction(ISD::ADD, MVT::i32, Custom);
- if (isGP64bit())
+ if (Subtarget->isGP64bit())
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
@@ -287,7 +292,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -368,7 +374,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
if (!Subtarget->hasMips64r2())
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
@@ -384,12 +390,13 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setMinFunctionAlignment(isGP64bit() ? 3 : 2);
+ setMinFunctionAlignment(Subtarget->isGP64bit() ? 3 : 2);
- setStackPointerRegisterToSaveRestore(isN64() ? Mips::SP_64 : Mips::SP);
+ setStackPointerRegisterToSaveRestore(Subtarget->isABI_N64() ? Mips::SP_64
+ : Mips::SP);
- setExceptionPointerRegister(isN64() ? Mips::A0_64 : Mips::A0);
- setExceptionSelectorRegister(isN64() ? Mips::A1_64 : Mips::A1);
+ setExceptionPointerRegister(Subtarget->isABI_N64() ? Mips::A0_64 : Mips::A0);
+ setExceptionSelectorRegister(Subtarget->isABI_N64() ? Mips::A1_64 : Mips::A1);
MaxStoresPerMemcpy = 16;
@@ -815,10 +822,10 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
return VReg;
}
-static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
- MachineBasicBlock &MBB,
- const TargetInstrInfo &TII,
- bool Is64Bit) {
+static MachineBasicBlock *insertDivByZeroTrap(MachineInstr *MI,
+ MachineBasicBlock &MBB,
+ const TargetInstrInfo &TII,
+ bool Is64Bit) {
if (NoZeroDivCheck)
return &MBB;
@@ -836,6 +843,10 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
// Clear Divisor's kill flag.
Divisor.setIsKill(false);
+
+ // We would normally delete the original instruction here but in this case
+ // we only needed to inject an additional instruction rather than replace it.
+
return &MBB;
}
@@ -918,10 +929,22 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitAtomicCmpSwap(MI, BB, 8);
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
- return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), false);
+ case Mips::DIV:
+ case Mips::DIVU:
+ case Mips::MOD:
+ case Mips::MODU:
+ return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+ false);
case Mips::PseudoDSDIV:
case Mips::PseudoDUDIV:
- return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), true);
+ case Mips::DDIV:
+ case Mips::DDIVU:
+ case Mips::DMOD:
+ case Mips::DMODU:
+ return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+ true);
+ case Mips::SEL_D:
+ return emitSEL_D(MI, BB);
}
}
@@ -941,16 +964,20 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
- LL = isMicroMips ? Mips::LL_MM : Mips::LL;
- SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ if (isMicroMips) {
+ LL = Mips::LL_MM;
+ SC = Mips::SC_MM;
+ } else {
+ LL = Subtarget->hasMips32r6() ? Mips::LL : Mips::LL_R6;
+ SC = Subtarget->hasMips32r6() ? Mips::SC : Mips::SC_R6;
+ }
AND = Mips::AND;
NOR = Mips::NOR;
ZERO = Mips::ZERO;
BEQ = Mips::BEQ;
- }
- else {
- LL = Mips::LLD;
- SC = Mips::SCD;
+ } else {
+ LL = Subtarget->hasMips64r6() ? Mips::LLD : Mips::LLD_R6;
+ SC = Subtarget->hasMips64r6() ? Mips::SCD : Mips::SCD_R6;
AND = Mips::AND64;
NOR = Mips::NOR64;
ZERO = Mips::ZERO_64;
@@ -1012,11 +1039,39 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return exitMBB;
}
-MachineBasicBlock *
-MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode,
- bool Nand) const {
+MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
+ MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
+ unsigned SrcReg) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Subtarget->hasMips32r2() && Size == 1) {
+ BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg);
+ return BB;
+ }
+
+ if (Subtarget->hasMips32r2() && Size == 2) {
+ BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg);
+ return BB;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ unsigned ScrReg = RegInfo.createVirtualRegister(RC);
+
+ assert(Size < 32);
+ int64_t ShiftImm = 32 - (Size * 8);
+
+ BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm);
+ BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm);
+
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
+ MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
assert((Size == 1 || Size == 2) &&
"Unsupported size for EmitAtomicBinaryPartial.");
@@ -1046,7 +1101,6 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
unsigned StoreVal = RegInfo.createVirtualRegister(RC);
unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
unsigned SrlRes = RegInfo.createVirtualRegister(RC);
- unsigned SllRes = RegInfo.createVirtualRegister(RC);
unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
@@ -1152,19 +1206,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// sinkMBB:
// and maskedoldval1,oldval,mask
// srl srlres,maskedoldval1,shiftamt
- // sll sllres,srlres,24
- // sra dest,sllres,24
+ // sign_extend dest,srlres
BB = sinkMBB;
- int64_t ShiftImm = (Size == 1) ? 24 : 16;
BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(MaskedOldVal1).addReg(ShiftAmt);
- BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
- .addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
- .addReg(SllRes).addImm(ShiftImm);
+ BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1285,7 +1334,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
unsigned StoreVal = RegInfo.createVirtualRegister(RC);
unsigned SrlRes = RegInfo.createVirtualRegister(RC);
- unsigned SllRes = RegInfo.createVirtualRegister(RC);
unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
@@ -1382,23 +1430,44 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
// sinkMBB:
// srl srlres,maskedoldval0,shiftamt
- // sll sllres,srlres,24
- // sra dest,sllres,24
+ // sign_extend dest,srlres
BB = sinkMBB;
- int64_t ShiftImm = (Size == 1) ? 24 : 16;
BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(MaskedOldVal0).addReg(ShiftAmt);
- BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
- .addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
- .addReg(SllRes).addImm(ShiftImm);
+ BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
+MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator II(MI);
+
+ unsigned Fc = MI->getOperand(1).getReg();
+ const auto &FGR64RegClass = TRI->getRegClass(Mips::FGR64RegClassID);
+
+ unsigned Fc2 = RegInfo.createVirtualRegister(FGR64RegClass);
+
+ BuildMI(*BB, II, DL, TII->get(Mips::SUBREG_TO_REG), Fc2)
+ .addImm(0)
+ .addReg(Fc)
+ .addImm(Mips::sub_lo);
+
+ // We don't erase the original instruction, we just replace the condition
+ // register with the 64-bit super-register.
+ MI->getOperand(1).setReg(Fc2);
+
+ return BB;
+}
+
//===----------------------------------------------------------------------===//
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
@@ -1421,7 +1490,8 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
0);
Chain = Addr.getValue(1);
- if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || isN64()) {
+ if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ||
+ Subtarget->isABI_N64()) {
// For PIC, the sequence is:
// BRIND(load(Jumptable + index) + RelocBase)
// RelocBase can be JumpTable, GOT or some sort of global base.
@@ -1439,6 +1509,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
SDLoc DL(Op);
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
// Return if flag is not set by a floating point comparison.
@@ -1458,6 +1529,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::
lowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
// Return if flag is not set by a floating point comparison.
@@ -1483,6 +1555,7 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op);
assert(Cond.getOpcode() == MipsISD::FPCmp &&
@@ -1502,7 +1575,8 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64()) {
const MipsTargetObjectFile &TLOF =
(const MipsTargetObjectFile&)getObjFileLowering();
@@ -1521,15 +1595,18 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
}
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
if (LargeGOT)
return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
MipsII::MO_GOT_LO16, DAG.getEntryNode(),
MachinePointerInfo::getGOT());
- return getAddrGlobal(N, Ty, DAG, (isN32() || isN64()) ? MipsII::MO_GOT_DISP
- : MipsII::MO_GOT16,
+ return getAddrGlobal(N, Ty, DAG,
+ (Subtarget->isABI_N32() || Subtarget->isABI_N64())
+ ? MipsII::MO_GOT_DISP
+ : MipsII::MO_GOT16,
DAG.getEntryNode(), MachinePointerInfo::getGOT());
}
@@ -1538,10 +1615,12 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::
@@ -1579,7 +1658,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, PtrTy, TlsGetAddr, &Args, 0);
+ .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1629,10 +1708,12 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::
@@ -1650,10 +1731,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1784,8 +1867,9 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
- isN64() ? Mips::FP_64 : Mips::FP, VT);
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL,
+ Subtarget->isABI_N64() ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
@@ -1801,7 +1885,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MVT VT = Op.getSimpleValueType();
- unsigned RA = isN64() ? Mips::RA_64 : Mips::RA;
+ unsigned RA = Subtarget->isABI_N64() ? Mips::RA_64 : Mips::RA;
MFI->setReturnAddressIsTaken(true);
// Return RA, which contains the return address. Mark it an implicit live-in.
@@ -1823,12 +1907,12 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc DL(Op);
- EVT Ty = isN64() ? MVT::i64 : MVT::i32;
+ EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
// Store stack offset in V1, store jump target in V0. Glue CopyToReg and
// EH_RETURN nodes, so that instructions are emitted back-to-back.
- unsigned OffsetReg = isN64() ? Mips::V1_64 : Mips::V1;
- unsigned AddrReg = isN64() ? Mips::V0_64 : Mips::V0;
+ unsigned OffsetReg = Subtarget->isABI_N64() ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
@@ -2256,8 +2340,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// in PIC mode) allow symbols to be resolved via lazy binding.
// The lazy binding stub requires GP to point to the GOT.
if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = isN64() ? Mips::GP_64 : Mips::GP;
- EVT Ty = isN64() ? MVT::i64 : MVT::i32;
+ unsigned GPReg = Subtarget->isABI_N64() ? Mips::GP_64 : Mips::GP;
+ EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
@@ -2326,8 +2410,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC::SpecialCallingConvType SpecialCallingConv =
getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo,
- SpecialCallingConv);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo, SpecialCallingConv);
MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
Subtarget->mipsSEUsesSoftFloat(),
@@ -2360,7 +2444,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
SDValue StackPtr = DAG.getCopyFromReg(
- Chain, DL, isN64() ? Mips::SP_64 : Mips::SP, getPointerTy());
+ Chain, DL, Subtarget->isABI_N64() ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
// With EABI is it possible to have 16 args on registers.
std::deque< std::pair<unsigned, SDValue> > RegsToPass;
@@ -2446,8 +2531,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- bool IsPICCall = (isN64() || IsPIC); // true if calls are translated to
- // jalr $25
+ bool IsPICCall =
+ (Subtarget->isABI_N64() || IsPIC); // true if calls are translated to
+ // jalr $25
bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
@@ -2458,7 +2544,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InternalLinkage = Val->hasInternalLinkage();
if (InternalLinkage)
- Callee = getAddrLocal(G, Ty, DAG, isN32() || isN64());
+ Callee = getAddrLocal(G, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
@@ -2474,7 +2561,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- if (!isN64() && !IsPIC) // !N64 && static
+ if (!Subtarget->isABI_N64() && !IsPIC) // !N64 && static
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
else if (LargeGOT)
@@ -2525,7 +2612,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(),
CallNode, RetTy);
@@ -2572,7 +2660,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat();
@@ -2634,7 +2723,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
(RegVT == MVT::i64 && ValVT == MVT::f64) ||
(RegVT == MVT::f64 && ValVT == MVT::i64))
ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
- else if (isO32() && RegVT == MVT::i32 && ValVT == MVT::f64) {
+ else if (Subtarget->isABI_O32() && RegVT == MVT::i32 &&
+ ValVT == MVT::f64) {
unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
getNextIntArgReg(ArgReg), RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
@@ -2672,7 +2762,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(
- getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
+ getRegClassFor(Subtarget->isABI_N64() ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
@@ -2723,7 +2813,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
*DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
// Analyze return values.
MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(),
@@ -2759,7 +2850,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
- unsigned V0 = isN64() ? Mips::V0_64 : Mips::V0;
+ unsigned V0 = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
@@ -2980,9 +3071,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
return std::make_pair(0U, &Mips::GPR32RegClass);
}
- if (VT == MVT::i64 && !isGP64bit())
+ if (VT == MVT::i64 && !Subtarget->isGP64bit())
return std::make_pair(0U, &Mips::GPR32RegClass);
- if (VT == MVT::i64 && isGP64bit())
+ if (VT == MVT::i64 && Subtarget->isGP64bit())
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
return std::make_pair(0U, nullptr);
@@ -3169,7 +3260,7 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
unsigned MipsTargetLowering::getJumpTableEncoding() const {
- if (isN64())
+ if (Subtarget->isABI_N64())
return MachineJumpTableInfo::EK_GPRel64BlockAddress;
return TargetLowering::getJumpTableEncoding();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4ac33bf..4701bc4 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -17,7 +17,6 @@
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
-#include "MipsSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
@@ -210,6 +209,7 @@ namespace llvm {
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
class MipsFunctionInfo;
+ class MipsSubtarget;
class MipsTargetLowering : public TargetLowering {
bool isMicroMips;
@@ -438,12 +438,6 @@ namespace llvm {
// Subtarget Info
const MipsSubtarget *Subtarget;
- bool hasMips64() const { return Subtarget->hasMips64(); }
- bool isGP64bit() const { return Subtarget->isGP64bit(); }
- bool isO32() const { return Subtarget->isABI_O32(); }
- bool isN32() const { return Subtarget->isABI_N32(); }
- bool isN64() const { return Subtarget->isABI_N64(); }
-
private:
// Create a TargetGlobalAddress node.
SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
@@ -598,6 +592,12 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
+ /// Emit a sign-extension using sll/sra, seb, or seh appropriately.
+ MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size, unsigned DstReg,
+ unsigned SrcRec) const;
+
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
@@ -607,6 +607,7 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *emitSEL_D(MachineInstr *MI, MachineBasicBlock *BB) const;
};
/// Create MipsTargetLowering objects.
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 32cda3b..2260d53 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -57,13 +57,13 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def IsFP64bit : Predicate<"Subtarget.isFP64bit()">,
+def IsFP64bit : Predicate<"Subtarget->isFP64bit()">,
AssemblerPredicate<"FeatureFP64Bit">;
-def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">,
+def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">,
AssemblerPredicate<"!FeatureFP64Bit">;
-def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
+def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">,
AssemblerPredicate<"FeatureSingleFloat">;
-def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
+def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">,
AssemblerPredicate<"!FeatureSingleFloat">;
//===----------------------------------------------------------------------===//
@@ -153,6 +153,15 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>;
+class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt),
+ !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr> {
+ // $fs_in is part of a white lie to work around a widespread bug in the FPU
+ // implementation. See expandBuildPairF64 for details.
+ let Constraints = "$fs = $fs_in";
+}
+
class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> :
InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
@@ -249,11 +258,11 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
}
-defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>;
-defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>,
+defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
+defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
-defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>,
+defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
@@ -355,8 +364,12 @@ def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>;
def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
MFC1_FM<3>, ISA_MIPS32R2;
-def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM<7>, ISA_MIPS32R2;
+def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+def MTHC1_D64 : MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
+ let DecoderNamespace = "Mips64";
+}
def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1,
bitconvert>, MFC1_FM<1>, ISA_MIPS3;
def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1,
@@ -390,56 +403,64 @@ def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
// Cop2 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
-def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
-def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
-def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, ISA_MIPS2;
-def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, ISA_MIPS2;
+def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+ ISA_MIPS2_NOT_32R6_64R6;
// Cop3 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
-def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
-def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
-def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, ISA_MIPS2;
-def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, ISA_MIPS2;
+let DecoderNamespace = "COP3_" in {
+ def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+ def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+ def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+ ISA_MIPS2;
+ def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+ ISA_MIPS2;
+}
// Indexed loads and stores.
// Base register + offset register addressing mode (indicated by "x" in the
// instruction mnemonic) is disallowed under NaCl.
let AdditionalPredicates = [IsNotNaCl] in {
def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>,
- INSN_MIPS4_32R2;
+ INSN_MIPS4_32R2_NOT_32R6_64R6;
def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>,
- INSN_MIPS4_32R2;
+ INSN_MIPS4_32R2_NOT_32R6_64R6;
}
let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in {
def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
- INSN_MIPS4_32R2, FGR_32;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
- INSN_MIPS4_32R2, FGR_32;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
}
let DecoderNamespace="Mips64" in {
def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
- INSN_MIPS4_32R2, FGR_64;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
- INSN_MIPS4_32R2, FGR_64;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
// Load/store doubleword indexed unaligned.
let AdditionalPredicates = [IsNotNaCl] in {
def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
- INSN_MIPS5_32R2, FGR_32;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32;
def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
- INSN_MIPS5_32R2, FGR_32;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32;
}
let DecoderNamespace="Mips64" in {
def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
- INSN_MIPS5_32R2, FGR_64;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64;
def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
- INSN_MIPS5_32R2, FGR_64;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64;
}
/// Floating-point Aritmetic
@@ -457,42 +478,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>, ISA_MIPS32R2;
+ MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>, ISA_MIPS32R2;
+ MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
- MADDS_FM<6, 0>, ISA_MIPS32R2;
+ MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
- MADDS_FM<7, 0>, ISA_MIPS32R2;
+ MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
}
let isCodeGenOnly=1 in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
}
let AdditionalPredicates = [NoNaNsFPMath],
isCodeGenOnly=1 in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
}
//===----------------------------------------------------------------------===//
@@ -504,9 +525,9 @@ def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
- BC1F_FM<0, 0>;
+ BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
- BC1F_FM<0, 1>;
+ BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -531,12 +552,13 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
-def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
+ ISA_MIPS1_NOT_32R6_64R6;
def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- AdditionalRequires<[NotFP64bit]>;
+ ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- AdditionalRequires<[IsFP64bit]>;
+ ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
@@ -569,8 +591,10 @@ def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>;
+def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
+ ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 0377eab..6a01ae5 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -844,6 +844,44 @@ class BARRIER_FM<bits<5> op> : StdArch {
let Inst{5-0} = 0; // SLL
}
+class SDBBP_FM : StdArch {
+ bits<20> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011100; // SPECIAL2
+ let Inst{25-6} = code_;
+ let Inst{5-0} = 0b111111; // SDBBP
+}
+
+class JR_HB_FM<bits<6> op> : StdArch{
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0; // SPECIAL
+ let Inst{25-21} = rs;
+ let Inst{20-11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = op;
+}
+
+class JALR_HB_FM<bits<6> op> : StdArch {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0; // SPECIAL
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = op;
+}
+
class COP0_TLB_FM<bits<6> op> : StdArch {
bits<32> Inst;
@@ -852,3 +890,17 @@ class COP0_TLB_FM<bits<6> op> : StdArch {
let Inst{24-6} = 0;
let Inst{5-0} = op; // Operation
}
+
+class CACHEOP_FM<bits<6> op> : StdArch {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-0} = offset;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 0d3cb75..8e9472c 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -146,61 +146,61 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasMips2 : Predicate<"Subtarget.hasMips2()">,
+def HasMips2 : Predicate<"Subtarget->hasMips2()">,
AssemblerPredicate<"FeatureMips2">;
-def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">,
+def HasMips3_32 : Predicate<"Subtarget->hasMips3_32()">,
AssemblerPredicate<"FeatureMips3_32">;
-def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">,
+def HasMips3_32r2 : Predicate<"Subtarget->hasMips3_32r2()">,
AssemblerPredicate<"FeatureMips3_32r2">;
-def HasMips3 : Predicate<"Subtarget.hasMips3()">,
+def HasMips3 : Predicate<"Subtarget->hasMips3()">,
AssemblerPredicate<"FeatureMips3">;
-def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">,
+def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
AssemblerPredicate<"FeatureMips4_32">;
-def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">,
+def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
AssemblerPredicate<"FeatureMips4_32r2">;
-def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">,
+def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
AssemblerPredicate<"FeatureMips5_32r2">;
-def HasMips32 : Predicate<"Subtarget.hasMips32()">,
+def HasMips32 : Predicate<"Subtarget->hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
-def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
+def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
-def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">,
+def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">,
AssemblerPredicate<"FeatureMips32r6">;
-def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">,
+def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">,
AssemblerPredicate<"!FeatureMips32r6">;
-def IsGP64bit : Predicate<"Subtarget.isGP64bit()">,
+def IsGP64bit : Predicate<"Subtarget->isGP64bit()">,
AssemblerPredicate<"FeatureGP64Bit">;
-def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">,
+def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">,
AssemblerPredicate<"!FeatureGP64Bit">;
-def HasMips64 : Predicate<"Subtarget.hasMips64()">,
+def HasMips64 : Predicate<"Subtarget->hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
-def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
+def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">,
AssemblerPredicate<"FeatureMips64r2">;
-def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">,
+def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
AssemblerPredicate<"FeatureMips64r6">;
-def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">,
+def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
AssemblerPredicate<"!FeatureMips64r6">;
-def IsN64 : Predicate<"Subtarget.isABI_N64()">,
+def IsN64 : Predicate<"Subtarget->isABI_N64()">,
AssemblerPredicate<"FeatureN64">;
-def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">,
+def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
-def HasCnMips : Predicate<"Subtarget.hasCnMips()">,
+def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
AssemblerPredicate<"FeatureMips32">;
def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
AssemblerPredicate<"FeatureMips32">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
-def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
+def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
-def NotDSP : Predicate<"!Subtarget.hasDSP()">;
-def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">,
+def NotDSP : Predicate<"!Subtarget->hasDSP()">;
+def InMicroMips : Predicate<"Subtarget->inMicroMipsMode()">,
AssemblerPredicate<"FeatureMicroMips">;
-def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">,
+def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">,
AssemblerPredicate<"!FeatureMicroMips">;
-def IsLE : Predicate<"Subtarget.isLittle()">;
-def IsBE : Predicate<"!Subtarget.isLittle()">;
-def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">;
+def IsLE : Predicate<"Subtarget->isLittle()">;
+def IsBE : Predicate<"!Subtarget->isLittle()">;
+def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
@@ -232,8 +232,17 @@ class ISA_MIPS3_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6];
}
class ISA_MIPS32 { list<Predicate> InsnPredicates = [HasMips32]; }
+class ISA_MIPS32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips32, NotMips32r6, NotMips64r6];
+}
class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
+class ISA_MIPS32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6];
+}
class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; }
+class ISA_MIPS64_NOT_64R6 {
+ list<Predicate> InsnPredicates = [HasMips64, NotMips64r6];
+}
class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
@@ -241,17 +250,32 @@ class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
+// The portions of MIPS-III that were also added to MIPS32 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS3_32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips3_32, NotMips32r6, NotMips64r6];
+}
+
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32R2 { list<Predicate> InsnPredicates = [HasMips3_32r2]; }
-// The portions of MIPS-IV that were also added to MIPS32
-class INSN_MIPS4_32 { list<Predicate> InsnPredicates = [HasMips4_32]; }
+// The portions of MIPS-IV that were also added to MIPS32 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS4_32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips4_32, NotMips32r6, NotMips64r6];
+}
-// The portions of MIPS-IV that were also added to MIPS32R2
-class INSN_MIPS4_32R2 { list<Predicate> InsnPredicates = [HasMips4_32r2]; }
+// The portions of MIPS-IV that were also added to MIPS32r2 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS4_32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips4_32r2, NotMips32r6, NotMips64r6];
+}
-// The portions of MIPS-V that were also added to MIPS32R2
-class INSN_MIPS5_32R2 { list<Predicate> InsnPredicates = [HasMips5_32r2]; }
+// The portions of MIPS-V that were also added to MIPS32r2 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS5_32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6];
+}
//===----------------------------------------------------------------------===//
@@ -328,7 +352,9 @@ def calltarget : Operand<iPTR> {
let ParserMatchClass = MipsJumpTargetAsmOperand;
}
+def simm9 : Operand<i32>;
def simm10 : Operand<i32>;
+def simm11 : Operand<i32>;
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
@@ -337,6 +363,13 @@ def simm16 : Operand<i32> {
def simm19_lsl2 : Operand<i32> {
let EncoderMethod = "getSimm19Lsl2Encoding";
let DecoderMethod = "DecodeSimm19Lsl2";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def simm18_lsl3 : Operand<i32> {
+ let EncoderMethod = "getSimm18Lsl3Encoding";
+ let DecoderMethod = "DecodeSimm18Lsl3";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
}
def simm20 : Operand<i32> {
@@ -386,6 +419,15 @@ def MipsMemAsmOperand : AsmOperandClass {
let ParserMethod = "parseMemOperand";
}
+def MipsMemSimm11AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm11";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffset<11>";
+ //let DiagnosticType = "Simm11";
+}
+
def MipsInvertedImmoperand : AsmOperandClass {
let Name = "InvNum";
let RenderMethod = "addImmOperands";
@@ -417,6 +459,17 @@ def mem_msa : mem_generic {
let EncoderMethod = "getMSAMemEncoding";
}
+def mem_simm9 : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm9);
+ let EncoderMethod = "getMemEncoding";
+}
+
+def mem_simm11 : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm11);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm11AsmOperand;
+}
+
def mem_ea : Operand<iPTR> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops ptr_rc, simm16);
@@ -690,20 +743,11 @@ class JumpFR<string opstr, RegisterOperand RO,
FrmR, opstr>;
// Indirect branch
-class IndirectBranch<string opstr, RegisterOperand RO> :
- JumpFR<opstr, RO, brind> {
+class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isBranch = 1;
let isIndirectBranch = 1;
}
-// Return instruction
-class RetBase<string opstr, RegisterOperand RO>: JumpFR<opstr, RO> {
- let isReturn = 1;
- let isCodeGenOnly = 1;
- let hasCtrlDep = 1;
- let hasExtraSrcRegAllocReq = 1;
-}
-
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
@@ -1042,7 +1086,7 @@ def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
ADD_FM<0, 0x23>;
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
- ADD_FM<0x1c, 2>, ISA_MIPS32;
+ ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
def SUB : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>;
def SLT : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
@@ -1103,7 +1147,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
ISA_MIPS1_NOT_32R6_64R6;
}
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
+def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
@@ -1127,6 +1171,7 @@ def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
+def SDBBP : SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
@@ -1139,8 +1184,8 @@ let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably
def WAIT : WAIT_FT<"wait">, WAIT_FM;
/// Load-linked, Store-conditional
-def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2;
-def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2;
+def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2_NOT_32R6_64R6;
+def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2_NOT_32R6_64R6;
}
/// Jump and Branch Instructions
@@ -1161,17 +1206,49 @@ def B : UncondBranch<BEQ>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
let AdditionalPredicates = [NotInMicroMips] in {
-def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
-def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
+ def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
+ def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
}
-def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>;
-def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>;
-def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>;
+
+// FIXME: JALX really requires either MIPS16 or microMIPS in addition to MIPS32.
+def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6;
+def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>,
+ ISA_MIPS1_NOT_32R6_64R6;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
def TAILCALL : TailCall<J>;
def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
-def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>;
+// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
+// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
+class PseudoIndirectBranchBase<RegisterOperand RO> :
+ MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> {
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
+
+// Return instructions are matched as a RetRA instruction, then ar expanded
+// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
+// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the
+// ISA.
+class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs),
+ [], IIBranch> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let isReturn = 1;
+ let isCodeGenOnly = 1;
+ let hasCtrlDep = 1;
+ let hasExtraSrcRegAllocReq = 1;
+}
+
+def PseudoReturn : PseudoReturnBase<GPR32Opnd>;
// Exception handling related node and instructions.
// The conversion sequence is:
@@ -1196,20 +1273,24 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
/// Multiply and Divide Instructions.
def MULT : MMRel, Mult<"mult", II_MULT, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x18>;
+ MULT_FM<0, 0x18>, ISA_MIPS1_NOT_32R6_64R6;
def MULTu : MMRel, Mult<"multu", II_MULTU, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x19>;
+ MULT_FM<0, 0x19>, ISA_MIPS1_NOT_32R6_64R6;
def SDIV : MMRel, Div<"div", II_DIV, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x1a>;
+ MULT_FM<0, 0x1a>, ISA_MIPS1_NOT_32R6_64R6;
def UDIV : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x1b>;
+ MULT_FM<0, 0x1b>, ISA_MIPS1_NOT_32R6_64R6;
-def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>;
-def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>;
+def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>,
+ ISA_MIPS1_NOT_32R6_64R6;
let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
AdditionalPredicates = [NotInMicroMips] in {
-def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>;
-def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>;
+def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>,
+ ISA_MIPS1_NOT_32R6_64R6;
}
/// Sign Ext In Register Instructions.
@@ -1219,8 +1300,10 @@ def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
SEB_FM<0x18, 0x20>, ISA_MIPS32R2;
/// Count Leading
-def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, ISA_MIPS32;
-def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32;
+def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>,
+ ISA_MIPS32_NOT_32R6_64R6;
/// Word Swap Bytes Within Halfwords
def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
@@ -1235,27 +1318,37 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, ISA_MIPS32;
-def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, ISA_MIPS32;
-def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, ISA_MIPS32;
-def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, ISA_MIPS32;
+def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>,
+ ISA_MIPS32_NOT_32R6_64R6;
let AdditionalPredicates = [NotDSP] in {
-def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>;
-def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>;
-def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>;
-def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>;
-def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>;
-def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>;
-def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>;
-def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>;
-def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>;
+def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>,
+ ISA_MIPS32_NOT_32R6_64R6;
}
def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
- 0, 1, 1>;
+ 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
- 0, 1, 1>;
+ 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
@@ -1274,6 +1367,46 @@ def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>;
def EHB : Barrier<"ehb">, BARRIER_FM<3>;
def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+// JR_HB and JALR_HB are defined here using the new style naming
+// scheme because some of this code is shared with Mips32r6InstrInfo.td
+// and because of that it doesn't follow the naming convention of the
+// rest of the file. To avoid a mixture of old vs new style, the new
+// style was chosen.
+class JR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rs");
+ list<dag> Pattern = [];
+}
+
+class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+ list<dag> Pattern = [];
+}
+
+class JR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>,
+ JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+ let isBranch=1;
+ let isIndirectBranch=1;
+ let hasDelaySlot=1;
+ let isTerminator=1;
+ let isBarrier=1;
+}
+
+class JALR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>,
+ JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> {
+ let isIndirectBranch=1;
+ let hasDelaySlot=1;
+}
+
+class JR_HB_ENC : JR_HB_FM<8>;
+class JALR_HB_ENC : JALR_HB_FM<9>;
+
+def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
+def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
+
class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther>;
def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>;
@@ -1281,6 +1414,15 @@ def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
+class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> :
+ InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint),
+ !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>;
+
+def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>,
+ INSN_MIPS3_32_NOT_32R6_64R6;
+def PREF : CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>,
+ INSN_MIPS3_32_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
@@ -1289,19 +1431,23 @@ def : MipsInstAlias<"move $dst, $src",
GPR_32 {
let AdditionalPredicates = [NotInMicroMips];
}
-def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"addu $rs, $rt, $imm",
(ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"add $rs, $rt, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"and $rs, $rt, $imm",
(ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"and $rs, $imm",
+ (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
}
def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
def : MipsInstAlias<"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<"neg $rt, $rs",
@@ -1318,6 +1464,8 @@ def : MipsInstAlias<"xor $rs, $rt, $imm",
(XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"or $rs, $imm",
+ (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
@@ -1360,6 +1508,9 @@ def : MipsInstAlias<"sra $rd, $rt, $rs",
(SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"srl $rd, $rt, $rs",
(SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6;
+def : MipsInstAlias<"sync",
+ (SYNC 0), 1>, ISA_MIPS2;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1412,6 +1563,10 @@ let AdditionalPredicates = [NotDSP] in {
(ADDiu GPR32:$src, imm:$imm)>;
}
+// SYNC
+def : MipsPat<(MipsSync (i32 immz)),
+ (SYNC 0)>, ISA_MIPS2;
+
// Call
def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
(JAL tglobaladdr:$dst)>;
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index acfe76e..c6838a3 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -15,6 +15,7 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsMCNaCl.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -64,7 +65,8 @@ namespace {
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {}
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 :
+ (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}
const char *getPassName() const override {
return "Mips Long Branch";
@@ -264,6 +266,13 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
LongBrMBB->addSuccessor(BalTgtMBB);
BalTgtMBB->addSuccessor(TgtMBB);
+ // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal
+ // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an
+ // pseudo-instruction wrapping BGEZAL).
+
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;
+
if (ABI != MipsSubtarget::N64) {
// $longbr:
// addiu $sp, $sp, -8
@@ -305,9 +314,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT)
.addMBB(TgtMBB).addMBB(BalTgtMBB);
MIBundleBuilder(*LongBrMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT)
- .addReg(Mips::AT).addMBB(TgtMBB).addMBB(BalTgtMBB));
+ .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT)
+ .addReg(Mips::AT)
+ .addMBB(TgtMBB)
+ .addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
@@ -316,10 +327,23 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
.addReg(Mips::SP).addImm(0);
- MIBundleBuilder(*BalTgtMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
- .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(8));
+ if (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8));
+ } else {
+ // In NaCl, modifying the sp is not allowed in branch delay slot.
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8);
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
+
+ // Bundle-align the target of indirect branch JR.
+ TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN);
+ }
} else {
// $longbr:
// daddiu $sp, $sp, -16
@@ -364,11 +388,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::AT_64).addImm(16);
MIBundleBuilder(*LongBrMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
- Mips::AT_64).addReg(Mips::AT_64)
- .addMBB(TgtMBB, MipsII::MO_ABS_LO)
- .addMBB(BalTgtMBB));
+ .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB))
+ .append(
+ BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO)
+ .addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
@@ -450,9 +475,18 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
continue;
int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+ int64_t Offset = computeOffset(I->Br) / ShVal;
+
+ if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+ // The offset calculation does not include sandboxing instructions
+ // that will be added later in the MC layer. Since at this point we
+ // don't know the exact amount of code that "sandboxing" will add, we
+ // conservatively estimate that code will not grow more than 100%.
+ Offset *= 2;
+ }
// Check if offset fits into 16-bit immediate field of branches.
- if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
+ if (!ForceLongBranch && isInt<16>(Offset))
continue;
I->HasLongBranch = true;
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index 6bd0366..bff2d0f 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-def HasMSA : Predicate<"Subtarget.hasMSA()">,
+def HasMSA : Predicate<"Subtarget->hasMSA()">,
AssemblerPredicate<"FeatureMSA">;
class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index e9101cc..8c16f82 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,7 +15,6 @@
#define MIPS_MACHINE_FUNCTION_INFO_H
#include "Mips16HardFloatInfo.h"
-#include "MipsSubtarget.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 83d25ab..084449b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -93,6 +93,9 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isFP64bit())
return CSR_O32_FP64_SaveList;
+ if (Subtarget.isFPXX())
+ return CSR_O32_FPXX_SaveList;
+
return CSR_O32_SaveList;
}
@@ -110,6 +113,9 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isFP64bit())
return CSR_O32_FP64_RegMask;
+ if (Subtarget.isFPXX())
+ return CSR_O32_FPXX_RegMask;
+
return CSR_O32_RegMask;
}
@@ -201,6 +207,11 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::GP_64);
}
+ if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) {
+ for (const auto &Reg : Mips::OddSPRegClass)
+ Reserved.set(Reg);
+ }
+
return Reserved;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 875a596..6323da3 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -340,6 +340,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
+// Used to reserve odd registers when given -mattr=+nooddspreg
+def OddSP : RegisterClass<"Mips", [f32], 32,
+ (add (decimate (sequence "F%u", 1, 31), 2),
+ (decimate (sequence "F_HI%u", 1, 31), 2))>,
+ Unallocatable;
+
// FP control registers.
def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
Unallocatable;
@@ -348,6 +354,10 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>,
Unallocatable;
+// MIPS32r6/MIPS64r6 store FPU condition codes in normal FGR registers.
+// This class allows us to represent this in codegen patterns.
+def FGRCC : RegisterClass<"Mips", [i32], 32, (sequence "F%u", 0, 31)>;
+
def MSA128B: RegisterClass<"Mips", [v16i8], 128,
(sequence "W%u", 0, 31)>;
def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128,
@@ -512,6 +522,12 @@ def FGR32Opnd : RegisterOperand<FGR32> {
let ParserMatchClass = FGR32AsmOperand;
}
+def FGRCCOpnd : RegisterOperand<FGRCC> {
+ // The assembler doesn't use register classes so we can re-use
+ // FGR32AsmOperand.
+ let ParserMatchClass = FGR32AsmOperand;
+}
+
def FGRH32Opnd : RegisterOperand<FGRH32> {
let ParserMatchClass = FGRH32AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 6ad5821..6573070 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -16,6 +16,7 @@
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -257,6 +258,9 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
return true;
}
+MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
+
unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
static const unsigned EhDataReg[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 5d2801f..e832848 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -20,8 +20,7 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
- explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.stackAlignment()) {}
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d5385be..6f35947 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- if (Subtarget.inMips16Mode())
+ if (Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
@@ -134,7 +134,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
- if (Subtarget.isABI_N64())
+ if (Subtarget->isABI_N64())
RC = (const TargetRegisterClass*)&Mips::GPR64RegClass;
else
RC = (const TargetRegisterClass*)&Mips::GPR32RegClass;
@@ -142,7 +142,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
- if (Subtarget.isABI_N64()) {
+ if (Subtarget->isABI_N64()) {
MF.getRegInfo().addLiveIn(Mips::T9_64);
MBB.addLiveIn(Mips::T9_64);
@@ -174,7 +174,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MF.getRegInfo().addLiveIn(Mips::T9);
MBB.addLiveIn(Mips::T9);
- if (Subtarget.isABI_N32()) {
+ if (Subtarget->isABI_N32()) {
// lui $v0, %hi(%neg(%gp_rel(fname)))
// addu $v1, $v0, $t9
// addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
@@ -187,7 +187,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
return;
}
- assert(Subtarget.isABI_O32());
+ assert(Subtarget->isABI_O32());
// For O32 ABI, the following instruction sequence is emitted to initialize
// the global base register:
@@ -408,7 +408,7 @@ bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
// * MSA is enabled
// * N is a ISD::BUILD_VECTOR representing a constant splat
bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
- if (!Subtarget.hasMSA())
+ if (!Subtarget->hasMSA())
return false;
BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
@@ -422,7 +422,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
- !Subtarget.isLittle()))
+ !Subtarget->isLittle()))
return false;
Imm = SplatValue;
@@ -648,7 +648,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
case ISD::ADDE: {
- if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+ if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
break;
SDValue InFlag = Node->getOperand(2);
Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
@@ -658,11 +658,11 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
case ISD::ConstantFP: {
ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO_64, MVT::i64);
Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
- } else if (Subtarget.isFP64bit()) {
+ } else if (Subtarget->isFP64bit()) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO, MVT::i32);
Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64,
@@ -813,12 +813,12 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
EVT ResVecTy = BVN->getValueType(0);
EVT ViaVecTy;
- if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector())
+ if (!Subtarget->hasMSA() || !BVN->getValueType(0).is128BitVector())
return std::make_pair(false, nullptr);
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
- !Subtarget.isLittle()))
+ !Subtarget->isLittle()))
return std::make_pair(false, nullptr);
switch (SplatBitSize) {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 969d730..be4ca86 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -39,7 +39,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
- if (isGP64bit())
+ if (Subtarget->isGP64bit())
addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
@@ -120,10 +120,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
if (Subtarget->hasCnMips())
setOperationAction(ISD::MUL, MVT::i64, Legal);
- else if (isGP64bit())
+ else if (Subtarget->isGP64bit())
setOperationAction(ISD::MUL, MVT::i64, Custom);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
}
@@ -152,6 +152,76 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STORE, MVT::f64, Custom);
}
+ if (Subtarget->hasMips32r6()) {
+ // MIPS32r6 replaces the accumulator-based multiplies with a three register
+ // instruction
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MULHS, MVT::i32, Legal);
+ setOperationAction(ISD::MULHU, MVT::i32, Legal);
+
+ // MIPS32r6 replaces the accumulator-based division/remainder with separate
+ // three register division and remainder instructions.
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Legal);
+ setOperationAction(ISD::UDIV, MVT::i32, Legal);
+ setOperationAction(ISD::SREM, MVT::i32, Legal);
+ setOperationAction(ISD::UREM, MVT::i32, Legal);
+
+ // MIPS32r6 replaces conditional moves with an equivalent that removes the
+ // need for three GPR read ports.
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Legal);
+ setOperationAction(ISD::SELECT, MVT::f32, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+
+ assert(Subtarget->isFP64bit() && "FR=1 is required for MIPS32r6");
+ setOperationAction(ISD::SETCC, MVT::f64, Legal);
+ setOperationAction(ISD::SELECT, MVT::f64, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Legal);
+
+ // Floating point > and >= are supported via < and <=
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ }
+
+ if (Subtarget->hasMips64r6()) {
+ // MIPS64r6 replaces the accumulator-based multiplies with a three register
+ // instruction
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::i64, Legal);
+ setOperationAction(ISD::MULHU, MVT::i64, Legal);
+
+ // MIPS32r6 replaces the accumulator-based division/remainder with separate
+ // three register division and remainder instructions.
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Legal);
+ setOperationAction(ISD::UDIV, MVT::i64, Legal);
+ setOperationAction(ISD::SREM, MVT::i64, Legal);
+ setOperationAction(ISD::UREM, MVT::i64, Legal);
+
+ // MIPS64r6 replaces conditional moves with an equivalent that removes the
+ // need for three GPR read ports.
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ }
+
computeRegisterProperties();
}
@@ -160,6 +230,14 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
return new MipsSETargetLowering(TM);
}
+const TargetRegisterClass *
+MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
+
+ return TargetLowering::getRepRegClassFor(VT);
+}
+
// Enable MSA support for the given integer type and Register class.
void MipsSETargetLowering::
addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
@@ -449,8 +527,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMADD(N, &DAG))
+ if (Subtarget->hasMips32() && !Subtarget->hasMips32r6() &&
+ N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -1178,6 +1256,9 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
+ // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
+ assert(!Subtarget->hasMips32r6());
+
EVT Ty = Op.getOperand(0).getValueType();
SDLoc DL(Op);
SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
@@ -1651,7 +1732,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_s_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
case Intrinsic::mips_copy_s_d:
- if (hasMips64())
+ if (Subtarget->hasMips64())
// Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
else {
@@ -1666,7 +1747,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_u_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
case Intrinsic::mips_copy_u_d:
- if (hasMips64())
+ if (Subtarget->hasMips64())
// Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
else {
@@ -2943,8 +3024,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
unsigned SrcValReg = MI->getOperand(3).getReg();
const TargetRegisterClass *VecRC = nullptr;
- const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass;
+ const TargetRegisterClass *GPRRC =
+ Subtarget->isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
unsigned EltLog2Size;
unsigned InsertOp = 0;
unsigned InsveOp = 0;
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 03a20ef..13ef6fc 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MipsSEISELLOWERING_H
-#define MipsSEISELLOWERING_H
+#ifndef MIPSSEISELLOWERING_H
+#define MIPSSEISELLOWERING_H
#include "MipsISelLowering.h"
#include "MipsRegisterInfo.h"
@@ -46,13 +46,7 @@ namespace llvm {
return false;
}
- const TargetRegisterClass *getRepRegClassFor(MVT VT) const override {
- if (VT == MVT::Untyped)
- return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass :
- &Mips::ACC64RegClass;
-
- return TargetLowering::getRepRegClassFor(VT);
- }
+ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
private:
bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index f6f364f..32da749 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -272,7 +272,7 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
default:
return false;
case Mips::RetRA:
- expandRetRA(MBB, MI, Mips::RET);
+ expandRetRA(MBB, MI);
break;
case Mips::PseudoMFHI:
Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
@@ -428,9 +428,14 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
}
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+ MachineBasicBlock::iterator I) const {
+ const auto &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isGP64bit())
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
+ .addReg(Mips::RA_64);
+ else
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
}
std::pair<bool, bool>
@@ -542,20 +547,31 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
const TargetRegisterInfo &TRI = getRegisterInfo();
+ bool HasMTHC1 = TM.getSubtarget<MipsSubtarget>().hasMips32r2() ||
+ TM.getSubtarget<MipsSubtarget>().hasMips32r6();
- // For FP32 mode:
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- // For FP64 mode:
+ // When mthc1 is available, use:
// mtc1 Lo, $fp
// mthc1 Hi, $fp
+ //
+ // Otherwise, for FP64:
+ // spill + reload via ldc1
+ // This has not been implemented since FP64 on MIPS32 and earlier is not
+ // supported.
+ //
+ // Otherwise, for FP32:
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
.addReg(LoReg);
- if (FP64) {
- // FIXME: The .addReg(DstReg, RegState::Implicit) is a white lie used to
- // temporarily work around a widespread bug in the -mfp64 support.
+ if (HasMTHC1 || FP64) {
+ assert(TM.getSubtarget<MipsSubtarget>().hasMips32r2() &&
+ "MTHC1 requires MIPS32r2");
+
+ // FIXME: The .addReg(DstReg) is a white lie used to temporarily work
+ // around a widespread bug in the -mfp64 support.
// The problem is that none of the 32-bit fpu ops mention the fact
// that they clobber the upper 32-bits of the 64-bit FPR. Fixing that
// requires a major overhaul of the FPU implementation which can't
@@ -565,9 +581,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
// We therefore pretend that it reads the bottom 32-bits to
// artificially create a dependency and prevent the scheduler
// changing the behaviour of the code.
- BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi))
- .addReg(HiReg)
- .addReg(DstReg, RegState::Implicit);
+ BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg)
+ .addReg(DstReg)
+ .addReg(HiReg);
} else
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
.addReg(HiReg);
@@ -580,17 +596,16 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
// indirect jump to TargetReg
const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
- unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned SP = STI.isGP64bit() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = STI.isGP64bit() ? Mips::RA_64 : Mips::RA;
+ unsigned T9 = STI.isGP64bit() ? Mips::T9_64 : Mips::T9;
+ unsigned ZERO = STI.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
unsigned OffsetReg = I->getOperand(0).getReg();
unsigned TargetReg = I->getOperand(1).getReg();
// addu $ra, $v0, $zero
// addu $sp, $sp, $v1
- // jr $ra
+ // jr $ra (via RetRA)
if (TM.getRelocationModel() == Reloc::PIC_)
BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9)
.addReg(TargetReg).addReg(ZERO);
@@ -598,7 +613,7 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
.addReg(TargetReg).addReg(ZERO);
BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
.addReg(SP).addReg(OffsetReg);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+ expandRetRA(MBB, I);
}
const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index aa68552..9ac94ce 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -81,8 +81,7 @@ public:
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
- void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
+ void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 0d4398e..edd8f67 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "mips-selectiondag-info"
-MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 6cafb55..2b3d527 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class MipsTargetMachine;
class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+ explicit MipsSelectionDAGInfo(const DataLayout &DL);
~MipsSelectionDAGInfo();
};
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 74ec064..693daa3 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -60,11 +60,9 @@ Mips16ConstantIslands(
/// Select the Mips CPU for the given triple and cpu name.
/// FIXME: Merge with the copy in MipsMCTargetDesc.cpp
-static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
+static StringRef selectMipsCPU(Triple TT, StringRef CPU) {
if (CPU.empty() || CPU == "generic") {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::mips ||
- TheTriple.getArch() == Triple::mipsel)
+ if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel)
CPU = "mips32";
else
CPU = "mips64";
@@ -74,39 +72,56 @@ static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
void MipsSubtarget::anchor() { }
+static std::string computeDataLayout(const MipsSubtarget &ST) {
+ std::string Ret = "";
+
+ // There are both little and big endian mips.
+ if (ST.isLittle())
+ Ret += "e";
+ else
+ Ret += "E";
+
+ Ret += "-m:m";
+
+ // Pointers are 32 bit on some ABIs.
+ if (!ST.isABI_N64())
+ Ret += "-p:32:32";
+
+ // 8 and 16 bit integers only need no have natural alignment, but try to
+ // align them to 32 bits. 64 bit integers have natural alignment.
+ Ret += "-i8:8:32-i16:16:32-i64:64";
+
+ // 32 bit registers are always available and the stack is at least 64 bit
+ // aligned. On N64 64 bit registers are also available and the stack is
+ // 128 bit aligned.
+ if (ST.isABI_N64() || ST.isABI_N32())
+ Ret += "-n32:64-S128";
+ else
+ Ret += "-n32-S64";
+
+ return Ret;
+}
+
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
Reloc::Model _RM, MipsTargetMachine *_TM)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
- IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false),
- HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
- HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
- InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
- InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
- RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) {
- std::string CPUName = CPU;
- CPUName = selectMipsCPU(TT, CPUName);
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-
- if (InMips16Mode && !TM->Options.UseSoftFloat) {
- // Hard float for mips16 means essentially to compile as soft float
- // but to use a runtime library for soft float that is written with
- // native mips32 floating point instructions (those runtime routines
- // run in mips32 hard float mode).
- TM->Options.UseSoftFloat = true;
- TM->Options.FloatABIType = FloatABI::Soft;
- InMips16HardFloat = true;
- }
+ IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
+ IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true),
+ HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
+ HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
+ InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
+ HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM),
+ TargetTriple(TT),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
+ TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*TM)),
+ FrameLowering(MipsFrameLowering::create(*TM, *this)),
+ TLInfo(MipsTargetLowering::create(*TM)) {
PreviousInMips16Mode = InMips16Mode;
- // Initialize scheduling itinerary for the specified CPU.
- InstrItins = getInstrItineraryForCPU(CPUName);
-
// Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and
// MIPS-V. They have not been tested and currently exist for the integrated
// assembler only.
@@ -137,6 +152,11 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
"See -mattr=+fp64.",
false);
+ if (!isABI_O32() && !useOddSPReg())
+ report_fatal_error("-mattr=+nooddspreg is not currently permitted for a "
+ "the O32 ABI.",
+ false);
+
if (hasMips32r6()) {
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
@@ -167,6 +187,29 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
return OptLevel >= CodeGenOpt::Aggressive;
}
+MipsSubtarget &
+MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine *TM) {
+ std::string CPUName = selectMipsCPU(TargetTriple, CPU);
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ if (InMips16Mode && !TM->Options.UseSoftFloat) {
+ // Hard float for mips16 means essentially to compile as soft float
+ // but to use a runtime library for soft float that is written with
+ // native mips32 floating point instructions (those runtime routines
+ // run in mips32 hard float mode).
+ TM->Options.UseSoftFloat = true;
+ TM->Options.FloatABIType = FloatABI::Soft;
+ InMips16HardFloat = true;
+ }
+
+ return *this;
+}
+
//FIXME: This logic for reseting the subtarget along with
// the helper classes can probably be simplified but there are a lot of
// cases so we will defer rewriting this to later.
@@ -186,14 +229,14 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
return;
OverrideMode = Mips16Override;
PreviousInMips16Mode = true;
- TM->setHelperClassesMips16();
+ setHelperClassesMips16();
return;
} else if (ChangeToNoMips16) {
if (!PreviousInMips16Mode)
return;
OverrideMode = NoMips16Override;
PreviousInMips16Mode = false;
- TM->setHelperClassesMipsSE();
+ setHelperClassesMipsSE();
return;
} else {
if (OverrideMode == NoOverride)
@@ -201,16 +244,52 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
OverrideMode = NoOverride;
DEBUG(dbgs() << "back to default" << "\n");
if (inMips16Mode() && !PreviousInMips16Mode) {
- TM->setHelperClassesMips16();
+ setHelperClassesMips16();
PreviousInMips16Mode = true;
} else if (!inMips16Mode() && PreviousInMips16Mode) {
- TM->setHelperClassesMipsSE();
+ setHelperClassesMipsSE();
PreviousInMips16Mode = false;
}
return;
}
}
+void MipsSubtarget::setHelperClassesMips16() {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ if (!InstrInfo16) {
+ InstrInfo.reset(MipsInstrInfo::create(*TM));
+ FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
+ TLInfo.reset(MipsTargetLowering::create(*TM));
+ } else {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering 16");
+ assert(InstrInfo && "null instr info 16");
+ assert(FrameLowering && "null frame lowering 16");
+}
+
+void MipsSubtarget::setHelperClassesMipsSE() {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ if (!InstrInfoSE) {
+ InstrInfo.reset(MipsInstrInfo::create(*TM));
+ FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
+ TLInfo.reset(MipsTargetLowering::create(*TM));
+ } else {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering in SE");
+ assert(InstrInfo && "null instr info SE");
+ assert(FrameLowering && "null frame lowering SE");
+}
+
bool MipsSubtarget::mipsSEUsesSoftFloat() const {
return TM->Options.UseSoftFloat && !InMips16HardFloat;
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 373f481..a3dcf03 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,6 +14,12 @@
#ifndef MIPSSUBTARGET_H
#define MIPSSUBTARGET_H
+#include "MipsFrameLowering.h"
+#include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsJITInfo.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -56,9 +62,16 @@ protected:
// floating point registers instead of only using even ones.
bool IsSingleFloat;
+ // IsFPXX - MIPS O32 modeless ABI.
+ bool IsFPXX;
+
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
+ /// Are odd single-precision registers permitted?
+ /// This corresponds to -modd-spreg and -mno-odd-spreg
+ bool UseOddSPReg;
+
// IsNan2008 - IEEE 754-2008 NaN encoding.
bool IsNaN2008bit;
@@ -132,6 +145,20 @@ protected:
MipsTargetMachine *TM;
Triple TargetTriple;
+
+ const DataLayout DL; // Calculates type size & alignment
+ const MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering;
+ std::unique_ptr<const MipsTargetLowering> TLInfo;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo16;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering16;
+ std::unique_ptr<const MipsTargetLowering> TLInfo16;
+ std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
+ std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
+ std::unique_ptr<const MipsTargetLowering> TLInfoSE;
+
public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
@@ -142,6 +169,7 @@ public:
bool isABI_N64() const { return MipsABI == N64; }
bool isABI_N32() const { return MipsABI == N32; }
bool isABI_O32() const { return MipsABI == O32; }
+ bool isABI_FPXX() const { return false; } // TODO: add check for FPXX
unsigned getTargetABI() const { return MipsABI; }
/// This constructor initializes the data members to match that
@@ -154,23 +182,36 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool hasMips1() const { return MipsArchVersion >= Mips1; }
bool hasMips2() const { return MipsArchVersion >= Mips2; }
bool hasMips3() const { return MipsArchVersion >= Mips3; }
+ bool hasMips4() const { return MipsArchVersion >= Mips4; }
+ bool hasMips5() const { return MipsArchVersion >= Mips5; }
bool hasMips4_32() const { return HasMips4_32; }
bool hasMips4_32r2() const { return HasMips4_32r2; }
- bool hasMips32() const { return MipsArchVersion >= Mips32; }
- bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
- MipsArchVersion == Mips64r2; }
- bool hasMips32r6() const { return MipsArchVersion == Mips32r6 ||
- MipsArchVersion == Mips64r6; }
+ bool hasMips32() const {
+ return MipsArchVersion >= Mips32 && MipsArchVersion != Mips3 &&
+ MipsArchVersion != Mips4 && MipsArchVersion != Mips5;
+ }
+ bool hasMips32r2() const {
+ return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips32r6 ||
+ MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
+ }
+ bool hasMips32r6() const {
+ return MipsArchVersion == Mips32r6 || MipsArchVersion == Mips64r6;
+ }
bool hasMips64() const { return MipsArchVersion >= Mips64; }
- bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+ bool hasMips64r2() const {
+ return MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
+ }
bool hasMips64r6() const { return MipsArchVersion == Mips64r6; }
bool hasCnMips() const { return HasCnMips; }
bool isLittle() const { return IsLittle; }
+ bool isFPXX() const { return IsFPXX; }
bool isFP64bit() const { return IsFP64bit; }
+ bool useOddSPReg() const { return UseOddSPReg; }
bool isNaN2008() const { return IsNaN2008bit; }
bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
@@ -234,12 +275,31 @@ public:
/// \brief Reset the subtarget for the Mips target.
void resetSubtarget(MachineFunction *MF);
+ MipsSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine *TM);
+
/// Does the system support unaligned memory access.
///
/// MIPS32r6/MIPS64r6 require full unaligned access support but does not
/// specify which component of the system provides it. Hardware, software, and
/// hybrid implementations are all valid.
bool systemSupportsUnalignedAccess() const { return hasMips32r6(); }
+
+ // Set helper classes
+ void setHelperClassesMips16();
+ void setHelperClassesMipsSE();
+
+ MipsJITInfo *getJITInfo() { return &JITInfo; }
+ const MipsSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
+ const TargetFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ const MipsTargetLowering *getTargetLowering() const { return TLInfo.get(); }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 984c58e..425dbf1 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,93 +45,21 @@ extern "C" void LLVMInitializeMipsTarget() {
RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
-static std::string computeDataLayout(const MipsSubtarget &ST) {
- std::string Ret = "";
-
- // There are both little and big endian mips.
- if (ST.isLittle())
- Ret += "e";
- else
- Ret += "E";
-
- Ret += "-m:m";
-
- // Pointers are 32 bit on some ABIs.
- if (!ST.isABI_N64())
- Ret += "-p:32:32";
-
- // 8 and 16 bit integers only need no have natural alignment, but try to
- // align them to 32 bits. 64 bit integers have natural alignment.
- Ret += "-i8:8:32-i16:16:32-i64:64";
-
- // 32 bit registers are always available and the stack is at least 64 bit
- // aligned. On N64 64 bit registers are also available and the stack is
- // 128 bit aligned.
- if (ST.isABI_N64() || ST.isABI_N32())
- Ret += "-n32:64-S128";
- else
- Ret += "-n32-S64";
-
- return Ret;
-}
-
// On function prologue, the stack is created by decrementing
// its pointer. Once decremented, all references are done with positive
// offset from the stack/frame pointer, using StackGrowsUp enables
// an easier handling.
// Using CodeModel::Large enables different CALL behavior.
-MipsTargetMachine::
-MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, RM, this),
- DL(computeDataLayout(Subtarget)),
- InstrInfo(MipsInstrInfo::create(*this)),
- FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()), JITInfo() {
+MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle, RM, this) {
initAsmInfo();
}
-
-void MipsTargetMachine::setHelperClassesMips16() {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- if (!InstrInfo16) {
- InstrInfo.reset(MipsInstrInfo::create(*this));
- FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
- TLInfo.reset(MipsTargetLowering::create(*this));
- } else {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering 16");
- assert(InstrInfo && "null instr info 16");
- assert(FrameLowering && "null frame lowering 16");
-}
-
-void MipsTargetMachine::setHelperClassesMipsSE() {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- if (!InstrInfoSE) {
- InstrInfo.reset(MipsInstrInfo::create(*this));
- FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
- TLInfo.reset(MipsTargetLowering::create(*this));
- } else {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering in SE");
- assert(InstrInfo && "null instr info SE");
- assert(FrameLowering && "null frame lowering SE");
-}
void MipsebTargetMachine::anchor() { }
MipsebTargetMachine::
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a5aa39b..a0e7d43 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -14,15 +14,9 @@
#ifndef MIPSTARGETMACHINE_H
#define MIPSTARGETMACHINE_H
-#include "MipsFrameLowering.h"
-#include "MipsISelLowering.h"
-#include "MipsInstrInfo.h"
-#include "MipsJITInfo.h"
-#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -32,68 +26,47 @@ class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- std::unique_ptr<const MipsInstrInfo> InstrInfo;
- std::unique_ptr<const MipsFrameLowering> FrameLowering;
- std::unique_ptr<const MipsTargetLowering> TLInfo;
- std::unique_ptr<const MipsInstrInfo> InstrInfo16;
- std::unique_ptr<const MipsFrameLowering> FrameLowering16;
- std::unique_ptr<const MipsTargetLowering> TLInfo16;
- std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
- std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
- std::unique_ptr<const MipsTargetLowering> TLInfoSE;
- MipsSelectionDAGInfo TSInfo;
- const InstrItineraryData &InstrItins;
- MipsJITInfo JITInfo;
public:
- MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
+ MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
virtual ~MipsTargetMachine() {}
void addAnalysisPasses(PassManagerBase &PM) override;
- const MipsInstrInfo *getInstrInfo() const override
- { return InstrInfo.get(); }
- const TargetFrameLowering *getFrameLowering() const override
- { return FrameLowering.get(); }
- const MipsSubtarget *getSubtargetImpl() const override
- { return &Subtarget; }
- const DataLayout *getDataLayout() const override
- { return &DL;}
-
+ const MipsInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const MipsSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const InstrItineraryData *getInstrItineraryData() const override {
- return Subtarget.inMips16Mode() ? nullptr : &InstrItins;
+ return Subtarget.inMips16Mode()
+ ? nullptr
+ : &getSubtargetImpl()->getInstrItineraryData();
+ }
+ MipsJITInfo *getJITInfo() override {
+ return Subtarget.getJITInfo();
}
-
- MipsJITInfo *getJITInfo() override { return &JITInfo; }
-
const MipsRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const MipsTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
-
const MipsSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
-
- // Set helper classes
- void setHelperClassesMips16();
-
- void setHelperClassesMipsSE();
-
-
};
/// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 4ad37ac..99f7d4c 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -12,46 +12,83 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
+#include "MCTargetDesc/MipsABIFlagsSection.h"
namespace llvm {
-class MipsTargetStreamer : public MCTargetStreamer {
- virtual void anchor();
+struct MipsABIFlagsSection;
+
+class MipsTargetStreamer : public MCTargetStreamer {
public:
MipsTargetStreamer(MCStreamer &S);
- virtual void emitDirectiveSetMicroMips() = 0;
- virtual void emitDirectiveSetNoMicroMips() = 0;
- virtual void emitDirectiveSetMips16() = 0;
- virtual void emitDirectiveSetNoMips16() = 0;
-
- virtual void emitDirectiveSetReorder() = 0;
- virtual void emitDirectiveSetNoReorder() = 0;
- virtual void emitDirectiveSetMacro() = 0;
- virtual void emitDirectiveSetNoMacro() = 0;
- virtual void emitDirectiveSetAt() = 0;
- virtual void emitDirectiveSetNoAt() = 0;
- virtual void emitDirectiveEnd(StringRef Name) = 0;
-
- virtual void emitDirectiveEnt(const MCSymbol &Symbol) = 0;
- virtual void emitDirectiveAbiCalls() = 0;
- virtual void emitDirectiveNaN2008() = 0;
- virtual void emitDirectiveNaNLegacy() = 0;
- virtual void emitDirectiveOptionPic0() = 0;
- virtual void emitDirectiveOptionPic2() = 0;
+ virtual void emitDirectiveSetMicroMips();
+ virtual void emitDirectiveSetNoMicroMips();
+ virtual void emitDirectiveSetMips16();
+ virtual void emitDirectiveSetNoMips16();
+
+ virtual void emitDirectiveSetReorder();
+ virtual void emitDirectiveSetNoReorder();
+ virtual void emitDirectiveSetMacro();
+ virtual void emitDirectiveSetNoMacro();
+ virtual void emitDirectiveSetAt();
+ virtual void emitDirectiveSetNoAt();
+ virtual void emitDirectiveEnd(StringRef Name);
+
+ virtual void emitDirectiveEnt(const MCSymbol &Symbol);
+ virtual void emitDirectiveAbiCalls();
+ virtual void emitDirectiveNaN2008();
+ virtual void emitDirectiveNaNLegacy();
+ virtual void emitDirectiveOptionPic0();
+ virtual void emitDirectiveOptionPic2();
virtual void emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg) = 0;
- virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) = 0;
- virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) = 0;
+ unsigned ReturnReg);
+ virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
+ virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
- virtual void emitDirectiveSetMips32R2() = 0;
- virtual void emitDirectiveSetMips64() = 0;
- virtual void emitDirectiveSetMips64R2() = 0;
- virtual void emitDirectiveSetDsp() = 0;
+ virtual void emitDirectiveSetMips32R2();
+ virtual void emitDirectiveSetMips64();
+ virtual void emitDirectiveSetMips64R2();
+ virtual void emitDirectiveSetDsp();
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo) = 0;
+ virtual void emitDirectiveCpload(unsigned RegNo);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
- const MCSymbol &Sym, bool IsReg) = 0;
+ const MCSymbol &Sym, bool IsReg);
+
+ /// Emit a '.module fp=value' directive using the given values.
+ /// Updates the .MIPS.abiflags section
+ virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
+ bool Is32BitABI) {
+ ABIFlagsSection.setFpABI(Value, Is32BitABI);
+ }
+
+ /// Emit a '.module fp=value' directive using the current values of the
+ /// .MIPS.abiflags section.
+ void emitDirectiveModuleFP() {
+ emitDirectiveModuleFP(ABIFlagsSection.getFpABI(),
+ ABIFlagsSection.Is32BitABI);
+ }
+
+ virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
+ virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
+ virtual void emitMipsAbiFlags(){};
+ void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; }
+ bool getCanHaveModuleDir() { return canHaveModuleDirective; }
+
+ // This method enables template classes to set internal abi flags
+ // structure values.
+ template <class PredicateLibrary>
+ void updateABIInfo(const PredicateLibrary &P) {
+ ABIFlagsSection.setAllFromPredicates(P);
+ }
+
+ MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
+
+protected:
+ MipsABIFlagsSection ABIFlagsSection;
+
+private:
+ bool canHaveModuleDirective;
};
// This part is for ascii assembly output
@@ -93,6 +130,13 @@ public:
virtual void emitDirectiveCpload(unsigned RegNo);
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+
+ // ABI Flags
+ void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
+ bool Is32BitABI) override;
+ void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+ void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
+ void emitMipsAbiFlags() override;
};
// This part is for ELF object output
@@ -144,6 +188,10 @@ public:
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ // ABI Flags
+ void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+ void emitMipsAbiFlags() override;
+
protected:
bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index d78b4e8..93fabf6 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -34,12 +34,18 @@ def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
"Target SM 3.0">;
def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
"Target SM 3.5">;
+def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
+ "Target SM 5.0">;
// PTX Versions
def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
"Use PTX version 3.0">;
def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
"Use PTX version 3.1">;
+def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
+ "Use PTX version 3.2">;
+def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
+ "Use PTX version 4.0">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -52,6 +58,7 @@ def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
def : Proc<"sm_35", [SM35]>;
+def : Proc<"sm_50", [SM50]>;
def NVPTXInstrInfo : InstrInfo {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 4ec575f..decf02a 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -734,23 +734,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
<< " func_retval0";
} else {
if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, Ty, vtparts);
- unsigned totalsz = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- totalsz += sz / 8;
- }
- }
+ unsigned totalsz = TD->getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
retAlignment = TD->getABITypeAlignment(Ty);
@@ -1321,6 +1305,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
// external global variable with init -> .visible
// external without init -> .extern
// appending -> not allowed, assert.
+// for any linkage other than
+// internal, private, linker_private,
+// linker_private_weak, linker_private_weak_def_auto,
+// we emit -> .weak.
void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
raw_ostream &O) {
@@ -1346,6 +1334,9 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append(V->getName().str());
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
+ } else if (!V->hasInternalLinkage() &&
+ !V->hasPrivateLinkage()) {
+ O << ".weak ";
}
}
}
@@ -1356,10 +1347,15 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// Skip meta data
if (GVar->hasSection()) {
- if (GVar->getSection() == "llvm.metadata")
+ if (GVar->getSection() == StringRef("llvm.metadata"))
return;
}
+ // Skip LLVM intrinsic global variables
+ if (GVar->getName().startswith("llvm.") ||
+ GVar->getName().startswith("nvvm."))
+ return;
+
const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
@@ -1371,6 +1367,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << ".visible ";
else
O << ".extern ";
+ } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
+ GVar->hasAvailableExternallyLinkage() ||
+ GVar->hasCommonLinkage()) {
+ O << ".weak ";
}
if (llvm::isTexture(*GVar)) {
@@ -1438,7 +1438,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << "linear";
break;
case 2:
- assert(0 && "Anisotropic filtering is not supported");
+ llvm_unreachable("Anisotropic filtering is not supported");
default:
O << "nearest";
break;
@@ -1480,6 +1480,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << ".";
emitPTXAddressSpace(PTy->getAddressSpace(), O);
+
+ if (isManaged(*GVar)) {
+ O << " .attribute(.managed)";
+ }
+
if (GVar->getAlignment() == 0)
O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
else
@@ -1497,13 +1502,24 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// Ptx allows variable initilization only for constant and global state
// spaces.
- if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
- GVar->hasInitializer()) {
- const Constant *Initializer = GVar->getInitializer();
- if (!Initializer->isNullValue()) {
- O << " = ";
- printScalarConstant(Initializer, O);
+ if (GVar->hasInitializer()) {
+ if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
+ const Constant *Initializer = GVar->getInitializer();
+ // 'undef' is treated as there is no value spefied.
+ if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
+ O << " = ";
+ printScalarConstant(Initializer, O);
+ }
+ } else {
+ // The frontend adds zero-initializer to variables that don't have an
+ // initial value, so skip warning for this case.
+ if (!GVar->getInitializer()->isNullValue()) {
+ std::string warnMsg = "initial value of '" + GVar->getName().str() +
+ "' is not allowed in addrspace(" +
+ llvm::utostr_32(PTy->getAddressSpace()) + ")";
+ report_fatal_error(warnMsg.c_str());
+ }
}
}
} else {
@@ -1562,7 +1578,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
break;
default:
- assert(0 && "type not supported yet");
+ llvm_unreachable("type not supported yet");
}
}
@@ -1682,7 +1698,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << "]";
break;
default:
- assert(0 && "type not supported yet");
+ llvm_unreachable("type not supported yet");
}
return;
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 9030584..8b088412 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -26,6 +26,10 @@
using namespace llvm;
+NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
+ is64bit(STI.is64Bit()) {}
+
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -43,17 +47,21 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// cvta.local %SP, %SPL;
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
- MachineInstr *MI = BuildMI(
- MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(LocalReg);
+ BuildMI(MBB, MI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
- MachineInstr *MI = BuildMI(
- MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(LocalReg);
+ BuildMI(MBB, MI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 2ae6d72..56fb673 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -17,16 +17,12 @@
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
-class NVPTXTargetMachine;
-
+class NVPTXSubtarget;
class NVPTXFrameLowering : public TargetFrameLowering {
- NVPTXTargetMachine &tm;
bool is64bit;
public:
- explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
- is64bit(_is64bit) {}
+ explicit NVPTXFrameLowering(NVPTXSubtarget &STI);
bool hasFP(const MachineFunction &MF) const override;
void emitPrologue(MachineFunction &MF) const override;
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 023dd5e..faa9fdb 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -84,7 +84,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
GlobalVariable *GV = I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
- !GV->getName().startswith("llvm.")) {
+ !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV->getType()->getElementType(), GV->isConstant(),
GV->getLinkage(),
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index cd30880..0dfbf10 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -24,11 +24,14 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-isel"
-static cl::opt<int>
-FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::init(2));
+unsigned FMAContractLevel = 0;
+
+static cl::opt<unsigned, true>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::location(FMAContractLevel),
+ cl::init(2));
static cl::opt<int> UsePrecDivF32(
"nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
@@ -138,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::LDGV4:
case NVPTXISD::LDUV2:
case NVPTXISD::LDUV4:
- ResNode = SelectLDGLDUVector(N);
+ ResNode = SelectLDGLDU(N);
break;
case NVPTXISD::StoreV2:
case NVPTXISD::StoreV4:
@@ -164,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::INTRINSIC_WO_CHAIN:
ResNode = SelectIntrinsicNoChain(N);
break;
+ case ISD::INTRINSIC_W_CHAIN:
+ ResNode = SelectIntrinsicChain(N);
+ break;
case NVPTXISD::Tex1DFloatI32:
case NVPTXISD::Tex1DFloatFloat:
case NVPTXISD::Tex1DFloatFloatLevel:
@@ -253,6 +259,12 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::Suld3DV4I32Trap:
ResNode = SelectSurfaceIntrinsic(N);
break;
+ case ISD::AND:
+ case ISD::SRA:
+ case ISD::SRL:
+ // Try to select BFE
+ ResNode = SelectBFE(N);
+ break;
case ISD::ADDRSPACECAST:
ResNode = SelectAddrSpaceCast(N);
break;
@@ -264,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IID) {
+ default:
+ return NULL;
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_p:
+ return SelectLDGLDU(N);
+ }
+}
+
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getMemOperand()->getValue();
@@ -981,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
return LD;
}
-SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
SDValue Chain = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
+ SDValue Op1;
+ MemSDNode *Mem;
+ bool IsLDG = true;
+
+ // If this is an LDG intrinsic, the address is the third operand. Its its an
+ // LDG/LDU SD node (from custom vector handling), then its the second operand
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ Op1 = N->getOperand(2);
+ Mem = cast<MemIntrinsicSDNode>(N);
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IID) {
+ default:
+ return NULL;
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_p:
+ IsLDG = true;
+ break;
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_p:
+ IsLDG = false;
+ break;
+ }
+ } else {
+ Op1 = N->getOperand(1);
+ Mem = cast<MemSDNode>(N);
+ }
+
unsigned Opcode;
SDLoc DL(N);
SDNode *LD;
- MemSDNode *Mem = cast<MemSDNode>(N);
SDValue Base, Offset, Addr;
- EVT EltVT = Mem->getMemoryVT().getVectorElementType();
+ EVT EltVT = Mem->getMemoryVT();
+ if (EltVT.isVector()) {
+ EltVT = EltVT.getVectorElementType();
+ }
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1092,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1181,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1276,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1365,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1457,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ MemRefs0[0] = Mem->getMemOperand();
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
return LD;
@@ -2959,6 +3261,214 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
return Ret;
}
+/// SelectBFE - Look for instruction sequences that can be made more efficient
+/// by using the 'bfe' (bit-field extract) PTX instruction
+SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Len;
+ SDValue Start;
+ SDValue Val;
+ bool IsSigned = false;
+
+ if (N->getOpcode() == ISD::AND) {
+ // Canonicalize the operands
+ // We want 'and %val, %mask'
+ if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
+ std::swap(LHS, RHS);
+ }
+
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
+ if (!Mask) {
+ // We need a constant mask on the RHS of the AND
+ return NULL;
+ }
+
+ // Extract the mask bits
+ uint64_t MaskVal = Mask->getZExtValue();
+ if (!isMask_64(MaskVal)) {
+ // We *could* handle shifted masks here, but doing so would require an
+ // 'and' operation to fix up the low-order bits so we would trade
+ // shr+and for bfe+and, which has the same throughput
+ return NULL;
+ }
+
+ // How many bits are in our mask?
+ uint64_t NumBits = CountTrailingOnes_64(MaskVal);
+ Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+
+ if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
+ // We have a 'srl/and' pair, extract the effective start bit and length
+ Val = LHS.getNode()->getOperand(0);
+ Start = LHS.getNode()->getOperand(1);
+ ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
+ if (StartConst) {
+ uint64_t StartVal = StartConst->getZExtValue();
+ // How many "good" bits do we have left? "good" is defined here as bits
+ // that exist in the original value, not shifted in.
+ uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
+ if (NumBits > GoodBits) {
+ // Do not handle the case where bits have been shifted in. In theory
+ // we could handle this, but the cost is likely higher than just
+ // emitting the srl/and pair.
+ return NULL;
+ }
+ Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
+ } else {
+ // Do not handle the case where the shift amount (can be zero if no srl
+ // was found) is not constant. We could handle this case, but it would
+ // require run-time logic that would be more expensive than just
+ // emitting the srl/and pair.
+ return NULL;
+ }
+ } else {
+ // Do not handle the case where the LHS of the and is not a shift. While
+ // it would be trivial to handle this case, it would just transform
+ // 'and' -> 'bfe', but 'and' has higher-throughput.
+ return NULL;
+ }
+ } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
+ if (LHS->getOpcode() == ISD::AND) {
+ ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
+ if (!ShiftCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+
+ uint64_t ShiftAmt = ShiftCnst->getZExtValue();
+
+ SDValue AndLHS = LHS->getOperand(0);
+ SDValue AndRHS = LHS->getOperand(1);
+
+ // Canonicalize the AND to have the mask on the RHS
+ if (isa<ConstantSDNode>(AndLHS)) {
+ std::swap(AndLHS, AndRHS);
+ }
+
+ ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
+ if (!MaskCnst) {
+ // Mask must be constant
+ return NULL;
+ }
+
+ uint64_t MaskVal = MaskCnst->getZExtValue();
+ uint64_t NumZeros;
+ uint64_t NumBits;
+ if (isMask_64(MaskVal)) {
+ NumZeros = 0;
+ // The number of bits in the result bitfield will be the number of
+ // trailing ones (the AND) minus the number of bits we shift off
+ NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
+ } else if (isShiftedMask_64(MaskVal)) {
+ NumZeros = countTrailingZeros(MaskVal);
+ unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
+ // The number of bits in the result bitfield will be the number of
+ // trailing zeros plus the number of set bits in the mask minus the
+ // number of bits we shift off
+ NumBits = NumZeros + NumOnes - ShiftAmt;
+ } else {
+ // This is not a mask we can handle
+ return NULL;
+ }
+
+ if (ShiftAmt < NumZeros) {
+ // Handling this case would require extra logic that would make this
+ // transformation non-profitable
+ return NULL;
+ }
+
+ Val = AndLHS;
+ Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
+ Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ } else if (LHS->getOpcode() == ISD::SHL) {
+ // Here, we have a pattern like:
+ //
+ // (sra (shl val, NN), MM)
+ // or
+ // (srl (shl val, NN), MM)
+ //
+ // If MM >= NN, we can efficiently optimize this with bfe
+ Val = LHS->getOperand(0);
+
+ SDValue ShlRHS = LHS->getOperand(1);
+ ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
+ if (!ShlCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+ uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
+
+ SDValue ShrRHS = RHS;
+ ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
+ if (!ShrCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+ uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
+
+ // To avoid extra codegen and be profitable, we need Outer >= Inner
+ if (OuterShiftAmt < InnerShiftAmt) {
+ return NULL;
+ }
+
+ // If the outer shift is more than the type size, we have no bitfield to
+ // extract (since we also check that the inner shift is <= the outer shift
+ // then this also implies that the inner shift is < the type size)
+ if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
+ return NULL;
+ }
+
+ Start =
+ CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
+ Len =
+ CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
+ OuterShiftAmt, MVT::i32);
+
+ if (N->getOpcode() == ISD::SRA) {
+ // If we have a arithmetic right shift, we need to use the signed bfe
+ // variant
+ IsSigned = true;
+ }
+ } else {
+ // No can do...
+ return NULL;
+ }
+ } else {
+ // No can do...
+ return NULL;
+ }
+
+
+ unsigned Opc;
+ // For the BFE operations we form here from "and" and "srl", always use the
+ // unsigned variants.
+ if (Val.getValueType() == MVT::i32) {
+ if (IsSigned) {
+ Opc = NVPTX::BFE_S32rii;
+ } else {
+ Opc = NVPTX::BFE_U32rii;
+ }
+ } else if (Val.getValueType() == MVT::i64) {
+ if (IsSigned) {
+ Opc = NVPTX::BFE_S64rii;
+ } else {
+ Opc = NVPTX::BFE_U64rii;
+ }
+ } else {
+ // We cannot handle this type
+ return NULL;
+ }
+
+ SDValue Ops[] = {
+ Val, Start, Len
+ };
+
+ SDNode *Ret =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+
+ return Ret;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 11f92e7..c44ccb2 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -59,10 +59,11 @@ private:
SDNode *Select(SDNode *N) override;
SDNode *SelectIntrinsicNoChain(SDNode *N);
+ SDNode *SelectIntrinsicChain(SDNode *N);
SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
- SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectLDGLDU(SDNode *N);
SDNode *SelectStore(SDNode *N);
SDNode *SelectStoreVector(SDNode *N);
SDNode *SelectLoadParam(SDNode *N);
@@ -71,6 +72,7 @@ private:
SDNode *SelectAddrSpaceCast(SDNode *N);
SDNode *SelectTextureIntrinsic(SDNode *N);
SDNode *SelectSurfaceIntrinsic(SDNode *N);
+ SDNode *SelectBFE(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b0943be..cb452ff 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <sstream>
@@ -111,6 +112,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Jump is Expensive. Don't create extra control flow for 'and', 'or'
// condition branches.
@@ -130,7 +132,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
// Operations not directly supported by NVPTX.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -146,6 +154,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
+
if (nvptxSubtarget.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
@@ -237,6 +252,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // We have some custom DAG combine patterns for these nodes
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SHL);
+
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties();
@@ -328,6 +350,16 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4:
return "NVPTXISD::StoreV4";
+ case NVPTXISD::FUN_SHFL_CLAMP:
+ return "NVPTXISD::FUN_SHFL_CLAMP";
+ case NVPTXISD::FUN_SHFR_CLAMP:
+ return "NVPTXISD::FUN_SHFR_CLAMP";
+ case NVPTXISD::IMAD:
+ return "NVPTXISD::IMAD";
+ case NVPTXISD::MUL_WIDE_SIGNED:
+ return "NVPTXISD::MUL_WIDE_SIGNED";
+ case NVPTXISD::MUL_WIDE_UNSIGNED:
+ return "NVPTXISD::MUL_WIDE_UNSIGNED";
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
@@ -441,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
- return VT.getScalarType() == MVT::i1;
+TargetLoweringBase::LegalizeTypeAction
+NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
+ return TypeSplitVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
}
SDValue
@@ -487,26 +523,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
} else {
- if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, retTy, vtparts);
- unsigned totalsz = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- // TODO: no need to loop
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- totalsz += sz / 8;
- }
- }
- O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ if((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
assert(false && "Unknown return type");
}
@@ -675,7 +697,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
@@ -687,34 +710,26 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- unsigned elems = 1;
EVT elemtype = vtparts[j];
- if (vtparts[j].isVector()) {
- elems = vtparts[j].getVectorNumElements();
- elemtype = vtparts[j].getVectorElementType();
- }
- for (unsigned k = 0, ke = elems; k != ke; ++k) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue StVal = OutVals[OIdx];
- if (elemtype.getSizeInBits() < 16) {
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32),
- StVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- elemtype, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += sz / 8;
- ++OIdx;
+ unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue StVal = OutVals[OIdx];
+ if (elemtype.getSizeInBits() < 16) {
+ StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
}
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(Offsets[j], MVT::i32),
+ StVal, InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
+ CopyParamVTs, CopyParamOps,
+ elemtype, MachinePointerInfo(),
+ ArgAlign);
+ InFlag = Chain.getValue(1);
+ ++OIdx;
}
if (vtparts.size() > 0)
--OIdx;
@@ -899,13 +914,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// struct or vector
SmallVector<EVT, 16> vtparts;
+ SmallVector<uint64_t, 16> Offsets;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputeValueVTs(*this, PTy->getElementType(), vtparts);
+ ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
@@ -917,38 +934,28 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- unsigned elems = 1;
EVT elemtype = vtparts[j];
- if (vtparts[j].isVector()) {
- elems = vtparts[j].getVectorNumElements();
- elemtype = vtparts[j].getVectorElementType();
+ int curOffset = Offsets[j];
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false,
+ PartAlign);
+ if (elemtype.getSizeInBits() < 16) {
+ theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
}
- for (unsigned k = 0, ke = elems; k != ke; ++k) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false,
- 0);
- if (elemtype.getSizeInBits() < 16) {
- theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32), theVal,
- InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, elemtype,
- MachinePointerInfo());
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32), theVal,
+ InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, elemtype,
+ MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += sz / 8;
- }
+ InFlag = Chain.getValue(1);
}
++paramCount;
}
@@ -1057,7 +1064,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
- unsigned resoffset = 0;
if (retTy && retTy->isVectorTy()) {
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
@@ -1066,14 +1072,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 16 ? true : false;
+ bool needTruncate = sz < 8 ? true : false;
if (NumElts == 1) {
// Just a simple load
SmallVector<EVT, 4> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
LoadRetVTs.push_back(MVT::i16);
} else
@@ -1097,9 +1104,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (NumElts == 2) {
// LoadV2
SmallVector<EVT, 4> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
LoadRetVTs.push_back(MVT::i16);
LoadRetVTs.push_back(MVT::i16);
@@ -1142,9 +1150,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
for (unsigned i = 0; i < NumElts; i += VecSize) {
SmallVector<EVT, 8> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
for (unsigned j = 0; j < VecSize; ++j)
LoadRetVTs.push_back(MVT::i16);
@@ -1183,10 +1192,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
} else {
SmallVector<EVT, 16> VTs;
- ComputePTXValueVTs(*this, retTy, VTs);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
+ unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
+ unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
bool needTruncate = sz < 8 ? true : false;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1212,19 +1224,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps,
- TheLoadType, MachinePointerInfo());
+ TheLoadType, MachinePointerInfo(), AlignI);
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval.getValue(0);
if (needTruncate)
Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
InVals.push_back(Ret0);
- resoffset += sz / 8;
}
}
}
@@ -1262,6 +1273,127 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
}
+/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
+/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
+/// amount, or
+/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
+/// amount.
+SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+
+ // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
+ // {dHi, dLo} = {aHi, aLo} >> Amt
+ // dHi = aHi >> Amt
+ // dLo = shf.r.clamp aLo, aHi, Amt
+
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
+ ShAmt);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+ else {
+
+ // {dHi, dLo} = {aHi, aLo} >> Amt
+ // - if (Amt>=size) then
+ // dLo = aHi >> (Amt-size)
+ // dHi = aHi >> Amt (this is either all 0 or all 1)
+ // else
+ // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
+ // dHi = aHi >> Amt
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which
+/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
+/// amount, or
+/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
+/// amount.
+SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+
+ if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+
+ // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
+ // {dHi, dLo} = {aHi, aLo} << Amt
+ // dHi = shf.l.clamp aLo, aHi, Amt
+ // dLo = aLo << Amt
+
+ SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
+ ShAmt);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+ else {
+
+ // {dHi, dLo} = {aHi, aLo} << Amt
+ // - if (Amt>=size) then
+ // dLo = aLo << Amt (all 0)
+ // dLo = aLo << (Amt-size)
+ // else
+ // dLo = aLo << Amt
+ // dHi = (aHi << Amt) | (aLo >> (size-Amt))
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+}
+
SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1282,6 +1414,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG);
case ISD::LOAD:
return LowerLOAD(Op, DAG);
+ case ISD::SHL_PARTS:
+ return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ return LowerShiftRightParts(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
@@ -1495,7 +1632,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = nvTM->getTargetLowering();
+ const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -1549,8 +1686,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
- EVT partVT = vtparts[parti];
- InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT));
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
++InsIdx;
}
if (vtparts.size() > 0)
@@ -1866,7 +2002,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
unsigned Offset = 0;
EVT VecVT =
- EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize);
+ EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
@@ -1925,12 +2061,10 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
} else {
SmallVector<EVT, 16> ValVTs;
- // const_cast is necessary since we are still using an LLVM version from
- // before the type system re-write.
- ComputePTXValueVTs(*this, RetTy, ValVTs);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
- unsigned SizeSoFar = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
EVT TheValType = theVal.getValueType();
@@ -1954,16 +2088,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
else if (TmpVal.getValueType().getSizeInBits() < 16)
TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
- SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
+ SDValue Ops[] = {
+ Chain,
+ DAG.getConstant(Offsets[i], MVT::i32),
+ TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
DAG.getVTList(MVT::Other), Ops,
TheStoreType,
MachinePointerInfo());
- if(TheValType.isVector())
- SizeSoFar +=
- TheStoreType.getVectorElementType().getStoreSizeInBits() / 8;
- else
- SizeSoFar += TheStoreType.getStoreSizeInBits()/8;
}
}
}
@@ -2220,22 +2352,62 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
- case Intrinsic::nvvm_ldu_global_p:
+ case Intrinsic::nvvm_ldu_global_p: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
Info.memVT = getValueType(I.getType());
- else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
Info.memVT = getValueType(I.getType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+
+ // alignment is available as metadata.
+ // Grab it and set the alignment.
+ assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+ MDNode *AlignMD = I.getMetadata("align");
+ assert(AlignMD && "Must have a non-null MDNode");
+ assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+ Value *Align = AlignMD->getOperand(0);
+ int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+ Info.align = Alignment;
+
+ return true;
+ }
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p: {
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
+ Info.memVT = getValueType(I.getType());
+ else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
+ Info.memVT = getPointerTy();
else
- Info.memVT = MVT::f32;
+ Info.memVT = getValueType(I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
- Info.align = 0;
+
+ // alignment is available as metadata.
+ // Grab it and set the alignment.
+ assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+ MDNode *AlignMD = I.getMetadata("align");
+ assert(AlignMD && "Must have a non-null MDNode");
+ assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+ Value *Align = AlignMD->getOperand(0);
+ int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+ Info.align = Alignment;
+
return true;
+ }
case Intrinsic::nvvm_tex_1d_v4f32_i32:
case Intrinsic::nvvm_tex_1d_v4f32_f32:
@@ -2427,6 +2599,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
switch (Constraint[0]) {
default:
break;
+ case 'b':
case 'r':
case 'h':
case 'c':
@@ -2446,6 +2619,8 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
+ case 'b':
+ return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
case 'c':
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
case 'h':
@@ -2469,6 +2644,406 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
+//===----------------------------------------------------------------------===//
+// NVPTX DAG Combining
+//===----------------------------------------------------------------------===//
+
+extern unsigned FMAContractLevel;
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const NVPTXSubtarget &Subtarget,
+ CodeGenOpt::Level OptLevel) {
+ SelectionDAG &DAG = DCI.DAG;
+ // Skip non-integer, non-scalar case
+ EVT VT=N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ // fold (add (mul a, b), c) -> (mad a, b, c)
+ //
+ if (N0.getOpcode() == ISD::MUL) {
+ assert (VT.isInteger());
+ // For integer:
+ // Since integer multiply-add costs the same as integer multiply
+ // but is more costly than integer add, do the fusion only when
+ // the mul is only used in the add.
+ if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
+ !N0.getNode()->hasOneUse())
+ return SDValue();
+
+ // Do the folding
+ return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+ else if (N0.getOpcode() == ISD::FMUL) {
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ if (FMAContractLevel == 0)
+ return SDValue();
+
+ // For floating point:
+ // Do the fusion only when the mul has less than 5 uses and all
+ // are add.
+ // The heuristic is that if a use is not an add, then that use
+ // cannot be fused into fma, therefore mul is still needed anyway.
+ // If there are more than 4 uses, even if they are all add, fusing
+ // them will increase register pressue.
+ //
+ int numUses = 0;
+ int nonAddCount = 0;
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ numUses++;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::FADD)
+ ++nonAddCount;
+ }
+ if (numUses >= 5)
+ return SDValue();
+ if (nonAddCount) {
+ int orderNo = N->getIROrder();
+ int orderNo2 = N0.getNode()->getIROrder();
+ // simple heuristics here for considering potential register
+ // pressure, the logics here is that the differnce are used
+ // to measure the distance between def and use, the longer distance
+ // more likely cause register pressure.
+ if (orderNo - orderNo2 < 500)
+ return SDValue();
+
+ // Now, check if at least one of the FMUL's operands is live beyond the node N,
+ // which guarantees that the FMA will not increase register pressure at node N.
+ bool opIsLive = false;
+ const SDNode *left = N0.getOperand(0).getNode();
+ const SDNode *right = N0.getOperand(1).getNode();
+
+ if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+ opIsLive = true;
+
+ if (!opIsLive)
+ for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ int orderNo3 = User->getIROrder();
+ if (orderNo3 > orderNo) {
+ opIsLive = true;
+ break;
+ }
+ }
+
+ if (!opIsLive)
+ for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ int orderNo3 = User->getIROrder();
+ if (orderNo3 > orderNo) {
+ opIsLive = true;
+ break;
+ }
+ }
+
+ if (!opIsLive)
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const NVPTXSubtarget &Subtarget,
+ CodeGenOpt::Level OptLevel) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
+ OptLevel);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // The type legalizer turns a vector load of i8 values into a zextload to i16
+ // registers, optionally ANY_EXTENDs it (if target type is integer),
+ // and ANDs off the high 8 bits. Since we turn this load into a
+ // target-specific DAG node, the DAG combiner fails to eliminate these AND
+ // nodes. Do that here.
+ SDValue Val = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(Val)) {
+ std::swap(Val, Mask);
+ }
+
+ SDValue AExt;
+ // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ AExt = Val;
+ Val = Val->getOperand(0);
+ }
+
+ if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
+ Val = Val->getOperand(0);
+ }
+
+ if (Val->getOpcode() == NVPTXISD::LoadV2 ||
+ Val->getOpcode() == NVPTXISD::LoadV4) {
+ ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
+ if (!MaskCnst) {
+ // Not an AND with a constant
+ return SDValue();
+ }
+
+ uint64_t MaskVal = MaskCnst->getZExtValue();
+ if (MaskVal != 0xff) {
+ // Not an AND that chops off top 8 bits
+ return SDValue();
+ }
+
+ MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
+ if (!Mem) {
+ // Not a MemSDNode?!?
+ return SDValue();
+ }
+
+ EVT MemVT = Mem->getMemoryVT();
+ if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
+ // We only handle the i8 case
+ return SDValue();
+ }
+
+ unsigned ExtType =
+ cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
+ getZExtValue();
+ if (ExtType == ISD::SEXTLOAD) {
+ // If for some reason the load is a sextload, the and is needed to zero
+ // out the high 8 bits
+ return SDValue();
+ }
+
+ bool AddTo = false;
+ if (AExt.getNode() != 0) {
+ // Re-insert the ext as a zext.
+ Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
+ AExt.getValueType(), Val);
+ AddTo = true;
+ }
+
+ // If we get here, the AND is unnecessary. Just replace it with the load
+ DCI.CombineTo(N, Val, AddTo);
+ }
+
+ return SDValue();
+}
+
+enum OperandSignedness {
+ Signed = 0,
+ Unsigned,
+ Unknown
+};
+
+/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
+/// that can be demoted to \p OptSize bits without loss of information. The
+/// signedness of the operand, if determinable, is placed in \p S.
+static bool IsMulWideOperandDemotable(SDValue Op,
+ unsigned OptSize,
+ OperandSignedness &S) {
+ S = Unknown;
+
+ if (Op.getOpcode() == ISD::SIGN_EXTEND ||
+ Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT OrigVT = Op.getOperand(0).getValueType();
+ if (OrigVT.getSizeInBits() == OptSize) {
+ S = Signed;
+ return true;
+ }
+ } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ EVT OrigVT = Op.getOperand(0).getValueType();
+ if (OrigVT.getSizeInBits() == OptSize) {
+ S = Unsigned;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
+/// be demoted to \p OptSize bits without loss of information. If the operands
+/// contain a constant, it should appear as the RHS operand. The signedness of
+/// the operands is placed in \p IsSigned.
+static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
+ unsigned OptSize,
+ bool &IsSigned) {
+
+ OperandSignedness LHSSign;
+
+ // The LHS operand must be a demotable op
+ if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
+ return false;
+
+ // We should have been able to determine the signedness from the LHS
+ if (LHSSign == Unknown)
+ return false;
+
+ IsSigned = (LHSSign == Signed);
+
+ // The RHS can be a demotable op or a constant
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
+ APInt Val = CI->getAPIntValue();
+ if (LHSSign == Unsigned) {
+ if (Val.isIntN(OptSize)) {
+ return true;
+ }
+ return false;
+ } else {
+ if (Val.isSignedIntN(OptSize)) {
+ return true;
+ }
+ return false;
+ }
+ } else {
+ OperandSignedness RHSSign;
+ if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
+ return false;
+
+ if (LHSSign != RHSSign)
+ return false;
+
+ return true;
+ }
+}
+
+/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
+/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
+/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
+/// amount.
+static SDValue TryMULWIDECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT MulType = N->getValueType(0);
+ if (MulType != MVT::i32 && MulType != MVT::i64) {
+ return SDValue();
+ }
+
+ unsigned OptSize = MulType.getSizeInBits() >> 1;
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Canonicalize the multiply so the constant (if any) is on the right
+ if (N->getOpcode() == ISD::MUL) {
+ if (isa<ConstantSDNode>(LHS)) {
+ std::swap(LHS, RHS);
+ }
+ }
+
+ // If we have a SHL, determine the actual multiply amount
+ if (N->getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!ShlRHS) {
+ return SDValue();
+ }
+
+ APInt ShiftAmt = ShlRHS->getAPIntValue();
+ unsigned BitWidth = MulType.getSizeInBits();
+ if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
+ APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
+ RHS = DCI.DAG.getConstant(MulVal, MulType);
+ } else {
+ return SDValue();
+ }
+ }
+
+ bool Signed;
+ // Verify that our operands are demotable
+ if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
+ return SDValue();
+ }
+
+ EVT DemotedVT;
+ if (MulType == MVT::i32) {
+ DemotedVT = MVT::i16;
+ } else {
+ DemotedVT = MVT::i32;
+ }
+
+ // Truncate the operands to the correct size. Note that these are just for
+ // type consistency and will (likely) be eliminated in later phases.
+ SDValue TruncLHS =
+ DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
+ SDValue TruncRHS =
+ DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
+
+ unsigned Opc;
+ if (Signed) {
+ Opc = NVPTXISD::MUL_WIDE_SIGNED;
+ } else {
+ Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
+ }
+
+ return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
+}
+
+/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel > 0) {
+ // Try mul.wide combining at OptLevel > 0
+ SDValue Ret = TryMULWIDECombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+ }
+
+ return SDValue();
+}
+
+/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
+static SDValue PerformSHLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel > 0) {
+ // Try mul.wide combining at OptLevel > 0
+ SDValue Ret = TryMULWIDECombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+ }
+
+ return SDValue();
+}
+
+SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // FIXME: Get this from the DAG somehow
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::FADD:
+ return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
+ case ISD::MUL:
+ return PerformMULCombine(N, DCI, OptLevel);
+ case ISD::SHL:
+ return PerformSHLCombine(N, DCI, OptLevel);
+ case ISD::AND:
+ return PerformANDCombine(N, DCI);
+ }
+ return SDValue();
+}
+
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 7bad8a2..7b4026d 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -16,7 +16,6 @@
#define NVPTXISELLOWERING_H
#include "NVPTX.h"
-#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -50,6 +49,11 @@ enum NodeType {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
+ FUN_SHFL_CLAMP,
+ FUN_SHFR_CLAMP,
+ MUL_WIDE_SIGNED,
+ MUL_WIDE_UNSIGNED,
+ IMAD,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -167,6 +171,8 @@ enum NodeType {
};
}
+class NVPTXSubtarget;
+
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
@@ -196,9 +202,9 @@ public:
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- EVT getSetCCResultType(LLVMContext &, EVT VT) const override {
+ EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
if (VT.isVector())
- return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
@@ -236,7 +242,8 @@ public:
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
- bool shouldSplitVectorType(EVT VT) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -255,8 +262,12 @@ private:
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
Type *Ty, unsigned Idx) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index 397f4bc..a98fb37 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -146,7 +146,7 @@ bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
- // unreachable block elimiation pass.
+ // unreachable block elimination pass.
for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ++UI) {
if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cdc8088..b5b4fbe 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
void NVPTXInstrInfo::anchor() {}
// FIXME: Add the subtarget support on this constructor.
-NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
- : NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {}
+NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI)
+ : NVPTXGenInstrInfo(), RegInfo(STI) {}
void NVPTXInstrInfo::copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 88a9e45..2ac2974 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -24,11 +24,10 @@
namespace llvm {
class NVPTXInstrInfo : public NVPTXGenInstrInfo {
- NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
virtual void anchor();
public:
- explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
+ explicit NVPTXInstrInfo(NVPTXSubtarget &STI);
const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index fbcd0e4..d2c0373 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -158,9 +158,12 @@ def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
+def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
+def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
+
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -461,33 +464,45 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
}]>;
-def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+def MULWIDES64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
"mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, i64imm:$b),
+def MULWIDES64Imm64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
"mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+def MULWIDEU64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
"mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, i64imm:$b),
+def MULWIDEU64Imm64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
"mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+def MULWIDES32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
"mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, i32imm:$b),
+def MULWIDES32Imm
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
"mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.u16 \t$dst, $a, $b;", []>;
-def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, i32imm:$b),
+def MULWIDEU32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
"mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
@@ -507,25 +522,63 @@ def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
- (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
+ (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
- (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
- (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
+ (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
- (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
+ (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
- (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
+ (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+
+def SDTMulWide
+ : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
+def mul_wide_signed
+ : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
+def mul_wide_unsigned
+ : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
+
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
+ (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+
+
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
+ (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
defm MULT : I3<"mul.lo.s", mul>;
@@ -541,69 +594,75 @@ defm SREM : I3<"rem.s", srem>;
defm UREM : I3<"rem.u", urem>;
// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
+def SDTIMAD
+ : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
+ SDTCisInt<2>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def imad
+ : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
+
def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, i16imm:$b, i16imm:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
- imm:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, imm:$b, imm:$c))]>;
def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, imm:$b), imm:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, imm:$b, imm:$c))]>;
def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, i64imm:$b, i64imm:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, imm:$b), imm:$c))]>;
-
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, imm:$b, imm:$c))]>;
def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"neg.s16 \t$dst, $src;",
@@ -809,36 +868,26 @@ multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
def rrr : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, Float32Regs:$b),
- Float32Regs:$c))]>, Requires<[Pred]>;
- // This is to WAR a weird bug in Tablegen that does not automatically
- // generate the following permutated rule rrr2 from the above rrr.
- // So we explicitly add it here. This happens to FMA32 only.
- // See the comments at FMAD32 and FMA32 for more information.
- def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd Float32Regs:$c,
- (fmul Float32Regs:$a, Float32Regs:$b)))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
Requires<[Pred]>;
def rri : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
Requires<[Pred]>;
def rir : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
Requires<[Pred]>;
def rii : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b, f32imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
Requires<[Pred]>;
}
@@ -846,73 +895,32 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
def rrr : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, Float64Regs:$b),
- Float64Regs:$c))]>, Requires<[Pred]>;
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
+ Requires<[Pred]>;
def rri : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
- Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
def rir : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
Requires<[Pred]>;
def rii : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b, f64imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
Requires<[Pred]>;
}
-// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
-// automatically generate the rrr2 rule from
-// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
-// If we reverse the order of the following two lines, then rrr2 rule will be
-// generated for FMA32, but not for rrr.
-// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
-defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
-defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
-defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
-
-// b*c-a => fmad(b, c, -a)
-multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
- (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
- Requires<[Pred]>;
-}
-
-// a-b*c => fmad(-b,c, a)
-// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
-// b*c-a => fmad(b, c, -a)
-// - legal because b*c-a <=> b*c+(-a)
-multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
- (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
- Requires<[Pred]>;
- def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
- (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
- Requires<[Pred]>;
-}
-
-// a-b*c => fmad(-b,c, a)
-// b*c-a => fmad(b, c, -a)
-multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
- (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
- Requires<[Pred]>;
-
- def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
- (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
- Requires<[Pred]>;
-}
-
-defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
-defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
-defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
+defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
@@ -1083,6 +1091,43 @@ multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
+//
+// Rotate: use ptx shf instruction if available.
+//
+
+// 32 bit r2 = rotl r1, n
+// =>
+// r2 = shf.l r1, r1, n
+def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]> ;
+
+def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
+
+// 32 bit r2 = rotr r1, n
+// =>
+// r2 = shf.r r1, r1, n
+def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]>;
+
+def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
+
+//
+// Rotate: if ptx shf instruction is not available, then use shift+add
+//
// 32bit
def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
@@ -1100,9 +1145,11 @@ def SUB_FRM_32 : SDNodeXForm<imm, [{
}]>;
def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
- (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]>;
def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
- (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
+ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
+ Requires<[noHWROT32]>;
def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
Int32Regs:$amt),
@@ -1115,7 +1162,8 @@ def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
!strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
!strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
!strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
Int32Regs:$amt),
@@ -1128,7 +1176,8 @@ def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
!strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
!strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
!strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
// 64bit
def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
@@ -1177,6 +1226,29 @@ def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
!strconcat("}}", ""))))))))),
[(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+// BFE - bit-field extract
+
+multiclass BFE<string TyStr, RegisterClass RC> {
+ // BFE supports both 32-bit and 64-bit values, but the start and length
+ // operands are always 32-bit
+ def rrr
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ def rri
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, i32imm:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ def rii
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, i32imm:$b, i32imm:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+}
+
+defm BFE_S32 : BFE<"s32", Int32Regs>;
+defm BFE_U32 : BFE<"u32", Int32Regs>;
+defm BFE_S64 : BFE<"s64", Int64Regs>;
+defm BFE_U64 : BFE<"u64", Int64Regs>;
//-----------------------------------
// General Comparison
@@ -1292,6 +1364,32 @@ def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
+//
+// Funnnel shift in clamp mode
+//
+// - SDNodes are created so they can be used in the DAG code,
+// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
+//
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
+def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
+
+def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFL_CLAMP Int32Regs:$lo,
+ Int32Regs:$hi, Int32Regs:$amt))]>;
+
+def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFR_CLAMP Int32Regs:$lo,
+ Int32Regs:$hi, Int32Regs:$amt))]>;
+
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 5e228fc..0ad3dfa 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1057,12 +1057,24 @@ def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
+ , (atomic_load_max_64 node:$a, node:$b)>;
+def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_64 node:$a, node:$b)>;
+def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_64 node:$a, node:$b)>;
def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
+def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
+def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1072,6 +1084,14 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+ ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+ ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
+ atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1080,6 +1100,14 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+ ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
+ atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_min
@@ -1089,12 +1117,24 @@ def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
+def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
+def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
+def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
+def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1104,6 +1144,14 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+ ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+ ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
+ atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1112,6 +1160,14 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+ ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
+ atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_inc atom_dec
@@ -1153,6 +1209,12 @@ def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
+def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
+def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1162,6 +1224,14 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
+ atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
+ atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
+ atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_or
@@ -1171,6 +1241,12 @@ def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
+def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
+def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1180,6 +1256,14 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
+ atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
+ atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
+ atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
// atom_xor
@@ -1189,6 +1273,12 @@ def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
+def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
+def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1198,6 +1288,14 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
+ atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
+ atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
+ atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_cas
@@ -1276,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
// Support for ldu on sm_20 or later
//-----------------------------------
-def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
- MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
- return M->getMemoryVT() == MVT::i8;
-}]>;
-
// Scalar
-// @TODO: Revisit this, Changed imemAny to imem
-multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ []>, Requires<[hasLDU]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
}
-multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDU]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
-}
-
-defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
- ldu_i8>;
-defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_p>;
-defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_p>;
+defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
+defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
+defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -1406,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE
// Support for ldg on sm_35 or later
//-----------------------------------
-def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
- MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
- return M->getMemoryVT() == MVT::i8;
-}]>;
-
-multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ []>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
-}
-
-multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDG]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
}
defm INT_PTX_LDG_GLOBAL_i8
- : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+ : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDG_GLOBAL_i16
- : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDG_GLOBAL_i32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
defm INT_PTX_LDG_GLOBAL_f32
- : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
- : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
defm INT_PTX_LDG_GLOBAL_p32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_p64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -1689,6 +1728,207 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
[(int_nvvm_compiler_error Int64Regs:$a)]>;
+// isspacep
+
+def ISSPACEP_CONST_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.const \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
+ Requires<[hasPTX31]>;
+def ISSPACEP_CONST_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.const \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
+ Requires<[hasPTX31]>;
+def ISSPACEP_GLOBAL_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.global \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
+def ISSPACEP_GLOBAL_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.global \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
+def ISSPACEP_LOCAL_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.local \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
+def ISSPACEP_LOCAL_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.local \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
+def ISSPACEP_SHARED_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.shared \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
+def ISSPACEP_SHARED_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.shared \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
+
+
+// Special register reads
+def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
+ (ins SpecialRegs:$r),
+ "mov.b32\t$d, $r;", []>;
+
+def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
+
+
+// rotate builtin support
+
+def ROTATE_B32_HW_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst,
+ (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]> ;
+
+def ROTATE_B32_HW_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst,
+ (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]> ;
+
+def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]> ;
+
+def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
+ (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]> ;
+
+def GET_LO_INT64
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %dummy;\n\t",
+ !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
+ !strconcat("}}", "")))),
+ []> ;
+
+def GET_HI_INT64
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %dummy;\n\t",
+ !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
+ !strconcat("}}", "")))),
+ []> ;
+
+def PACK_TWO_INT32
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
+ "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
+
+def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
+ (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src))> ;
+
+// funnel shift, requires >= sm_32
+def SHF_L_WRAP_B32_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_L_WRAP_B32_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_R_WRAP_B32_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+ "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_R_WRAP_B32_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+// HW version of rotate 64
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (PACK_TWO_INT32
+ (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), imm:$amt),
+ (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
+ Requires<[hasHWROT32]>;
+
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
+ (PACK_TWO_INT32
+ (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
+ (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
+ Requires<[hasHWROT32]>;
+
+
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (PACK_TWO_INT32
+ (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), imm:$amt),
+ (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
+ Requires<[hasHWROT32]>;
+
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
+ (PACK_TWO_INT32
+ (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
+ (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
+ Requires<[hasHWROT32]>;
+
+// SW version of rotate 64
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
+ (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
+ (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]>;
+
+
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 0ee018c..5547649 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -66,7 +66,7 @@ public:
const MCAsmLayout *Layout) const override {
return false;
}
- void AddValueSymbols(MCAssembler *) const override {};
+ void visitUsedExpr(MCStreamer &Streamer) const override {};
const MCSection *FindAssociatedSection() const override {
return nullptr;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 7a38a66..3482248 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -46,6 +46,10 @@ foreach i = 0-4 in {
def da#i : NVPTXReg<"%da"#i>;
}
+foreach i = 0-31 in {
+ def ENVREG#i : NVPTXReg<"%envreg"#i>;
+}
+
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
@@ -61,4 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot,
+ (sequence "ENVREG%u", 0, 31))>;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 8c7df52..d5cded2 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -25,10 +25,41 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
+static std::string computeDataLayout(bool is64Bit) {
+ std::string Ret = "e";
+
+ if (!is64Bit)
+ Ret += "-p:32:32";
+
+ Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+
+ return Ret;
+}
+
+NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ // Provide the default CPU if we don't have one.
+ if (CPU.empty() && FS.size())
+ llvm_unreachable("we are not using FeatureStr");
+ TargetName = CPU.empty() ? "sm_20" : CPU;
+
+ ParseSubtargetFeatures(TargetName, FS);
+
+ // Set default to PTX 3.2 (CUDA 5.5)
+ if (PTXVersion == 0) {
+ PTXVersion = 32;
+ }
+
+ return *this;
+}
+
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit)
+ const std::string &FS, const TargetMachine &TM,
+ bool is64Bit)
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
- SmVersion(20) {
+ SmVersion(20), DL(computeDataLayout(is64Bit)),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) {
Triple T(TT);
@@ -36,26 +67,4 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
drvInterface = NVPTX::NVCL;
else
drvInterface = NVPTX::CUDA;
-
- // Provide the default CPU if none
- std::string defCPU = "sm_20";
-
- ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
-
- // Get the TargetName from the FS if available
- if (FS.empty() && CPU.empty())
- TargetName = defCPU;
- else if (!CPU.empty())
- TargetName = CPU;
- else
- llvm_unreachable("we are not using FeatureStr");
-
- // We default to PTX 3.1, but we cannot just default to it in the initializer
- // since the attribute parser checks if the given option is >= the default.
- // So if we set ptx31 as the default, the ptx30 attribute would never match.
- // Instead, we use 0 as the default and manually set 31 if the default is
- // used.
- if (PTXVersion == 0) {
- PTXVersion = 31;
- }
}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 581e5ed..3ed5747 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -15,6 +15,12 @@
#define NVPTXSUBTARGET_H
#include "NVPTX.h"
+#include "NVPTXFrameLowering.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -35,12 +41,30 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned int SmVersion;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+
+ // NVPTX does not have any call stack frame, but need a NVPTX specific
+ // FrameLowering class because TargetFrameLowering is abstract.
+ NVPTXFrameLowering FrameLowering;
+
public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit);
+ const std::string &FS, const TargetMachine &TM, bool is64Bit);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const NVPTXRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
@@ -57,10 +81,12 @@ public:
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
bool hasLDG() const { return SmVersion >= 32; }
- bool hasLDU() const { return SmVersion >= 20; }
+ bool hasLDU() const { return ((SmVersion >= 20) && (SmVersion < 30)); }
bool hasGenericLdSt() const { return SmVersion >= 20; }
- inline bool hasHWROT32() const { return false; }
- inline bool hasSWROT32() const { return true; }
+ inline bool hasHWROT32() const { return SmVersion >= 32; }
+ inline bool hasSWROT32() const {
+ return ((SmVersion >= 20) && (SmVersion < 32));
+ }
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
@@ -76,6 +102,7 @@ public:
unsigned getPTXVersion() const { return PTXVersion; }
+ NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
};
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 26a4f84..069a1b9 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -66,26 +66,13 @@ extern "C" void LLVMInitializeNVPTXTarget() {
*PassRegistry::getPassRegistry());
}
-static std::string computeDataLayout(const NVPTXSubtarget &ST) {
- std::string Ret = "e";
-
- if (!ST.is64Bit())
- Ret += "-p:32:32";
-
- Ret += "-i64:64-v16:16-v32:32-n16:32:64";
-
- return Ret;
-}
-
-NVPTXTargetMachine::NVPTXTargetMachine(
- const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64bit)
+NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit), DL(computeDataLayout(Subtarget)),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(
- *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
@@ -119,6 +106,7 @@ public:
bool addInstSelector() override;
bool addPreRegAlloc() override;
bool addPostRegAlloc() override;
+ void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
@@ -220,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
printAndVerify("After StackSlotColoring");
}
+
+void NVPTXPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(&EarlyTailDuplicateID))
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+
+ addPass(&MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(&PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 2db7c18..a7a1c8f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -14,13 +14,8 @@
#ifndef NVPTX_TARGETMACHINE_H
#define NVPTX_TARGETMACHINE_H
-#include "ManagedStringPool.h"
-#include "NVPTXFrameLowering.h"
-#include "NVPTXISelLowering.h"
-#include "NVPTXInstrInfo.h"
-#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "ManagedStringPool.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -31,50 +26,37 @@ namespace llvm {
///
class NVPTXTargetMachine : public LLVMTargetMachine {
NVPTXSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- NVPTXInstrInfo InstrInfo;
- NVPTXTargetLowering TLInfo;
- TargetSelectionDAGInfo TSInfo;
-
- // NVPTX does not have any call stack frame, but need a NVPTX specific
- // FrameLowering class because TargetFrameLowering is abstract.
- NVPTXFrameLowering FrameLowering;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
- //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
- // bool DisableVerify, MCContext *&OutCtx);
-
public:
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const NVPTXInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-
const NVPTXRegisterInfo *getRegisterInfo() const override {
- return &(InstrInfo.getRegisterInfo());
+ return getSubtargetImpl()->getRegisterInfo();
}
- NVPTXTargetLowering *getTargetLowering() const override {
- return const_cast<NVPTXTargetLowering *>(&TLInfo);
+ const NVPTXTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
}
const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- //virtual bool addInstSelector(PassManagerBase &PM,
- // CodeGenOpt::Level OptLevel);
-
- //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
-
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index cb8bd72..a8d6b95 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -47,17 +48,16 @@ class NVVMReflect : public ModulePass {
private:
StringMap<int> VarMap;
typedef DenseMap<std::string, int>::iterator VarMapIter;
- Function *ReflectFunction;
public:
static char ID;
- NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) {
+ NVVMReflect() : ModulePass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
}
NVVMReflect(const StringMap<int> &Mapping)
- : ModulePass(ID), ReflectFunction(nullptr) {
+ : ModulePass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
I != E; ++I) {
@@ -70,6 +70,8 @@ public:
}
bool runOnModule(Module &) override;
+private:
+ bool handleFunction(Function *ReflectFunction);
void setVarMap();
};
}
@@ -120,19 +122,7 @@ void NVVMReflect::setVarMap() {
}
}
-bool NVVMReflect::runOnModule(Module &M) {
- if (!NVVMReflectEnabled)
- return false;
-
- setVarMap();
-
- ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
-
- // If reflect function is not used, then there will be
- // no entry in the module.
- if (!ReflectFunction)
- return false;
-
+bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// Validate _reflect function
assert(ReflectFunction->isDeclaration() &&
"_reflect function should not have a body");
@@ -155,13 +145,15 @@ bool NVVMReflect::runOnModule(Module &M) {
"Only one operand expect for _reflect function");
// In cuda, we will have an extra constant-to-generic conversion of
// the string.
- const Value *conv = Reflect->getArgOperand(0);
- assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
- const CallInst *ConvCall = cast<CallInst>(conv);
- const Value *str = ConvCall->getArgOperand(0);
- assert(isa<ConstantExpr>(str) &&
+ const Value *Str = Reflect->getArgOperand(0);
+ if (isa<CallInst>(Str)) {
+ // CUDA path
+ const CallInst *ConvCall = cast<CallInst>(Str);
+ Str = ConvCall->getArgOperand(0);
+ }
+ assert(isa<ConstantExpr>(Str) &&
"Format of _reflect function not recognized");
- const ConstantExpr *GEP = cast<ConstantExpr>(str);
+ const ConstantExpr *GEP = cast<ConstantExpr>(Str);
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
@@ -195,3 +187,36 @@ bool NVVMReflect::runOnModule(Module &M) {
ToRemove[i]->eraseFromParent();
return true;
}
+
+bool NVVMReflect::runOnModule(Module &M) {
+ if (!NVVMReflectEnabled)
+ return false;
+
+ setVarMap();
+
+
+ bool Res = false;
+ std::string Name;
+ Type *Tys[1];
+ Type *I8Ty = Type::getInt8Ty(M.getContext());
+ Function *ReflectFunction;
+
+ // Check for standard overloaded versions of llvm.nvvm.reflect
+
+ for (unsigned i = 0; i != 5; ++i) {
+ Tys[0] = PointerType::get(I8Ty, i);
+ Name = Intrinsic::getName(Intrinsic::nvvm_reflect, Tys);
+ ReflectFunction = M.getFunction(Name);
+ if(ReflectFunction != 0) {
+ Res |= handleFunction(ReflectFunction);
+ }
+ }
+
+ ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+ // If reflect function is not used, then there will be
+ // no entry in the module.
+ if (ReflectFunction != 0)
+ Res |= handleFunction(ReflectFunction);
+
+ return Res;
+}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index 45cc0b8..02c5a94 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -1,5 +1,5 @@
-#ifndef __CL_COMMON_DEFINES_H__
-#define __CL_COMMON_DEFINES_H__
+#ifndef CL_COMMON_DEFINES_H
+#define CL_COMMON_DEFINES_H
// This file includes defines that are common to both kernel code and
// the NVPTX back-end.
@@ -119,4 +119,4 @@ typedef enum clk_sampler_type {
#define CLK_LOCAL_MEM_FENCE (1 << 0)
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
-#endif // __CL_COMMON_DEFINES_H__
+#endif // CL_COMMON_DEFINES_H
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 3ac037d..2f562ca 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -238,7 +238,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseExpression(const MCExpr *&EVal);
bool ParseDarwinExpression(const MCExpr *&EVal);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
@@ -246,12 +246,11 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDarwinDirectiveMachine(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
- void ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
/// @name Auto-generated Match Functions
/// {
@@ -276,13 +275,12 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
const MCExpr *applyModifierToExpr(const MCExpr *E,
@@ -548,8 +546,9 @@ public:
void print(raw_ostream &OS) const override;
- static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Token);
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -558,22 +557,27 @@ public:
return Op;
}
- static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S,
- bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
// Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
- PPCOperand *Op = new (Mem) PPCOperand(Token);
- Op->Tok.Data = (const char *)(Op + 1);
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = (const char *)(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((char *)(Op + 1), Str.data(), Str.size());
+ std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
return Op;
}
- static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Immediate);
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -581,9 +585,9 @@ public:
return Op;
}
- static PPCOperand *CreateExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Expression);
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -592,9 +596,9 @@ public:
return Op;
}
- static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(TLSRegister);
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -602,8 +606,8 @@ public:
return Op;
}
- static PPCOperand *CreateFromMCExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -634,10 +638,8 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
-
-void PPCAsmParser::
-ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
case PPC::LAx: {
@@ -917,11 +919,10 @@ ProcessInstruction(MCInst &Inst,
}
}
-bool PPCAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
@@ -942,7 +943,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -1216,12 +1217,10 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
-bool PPCAsmParser::
-ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
- PPCOperand *Op;
// Attempt to parse the next token as an immediate
switch (getLexer().getKind()) {
@@ -1233,8 +1232,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
return Error(S, "invalid register name");
@@ -1249,8 +1247,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
}
@@ -1272,8 +1269,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Push the parsed operand into the list of operands
- Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
// Check whether this is a TLS call expression
bool TLSCall = false;
@@ -1292,8 +1288,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
}
// Otherwise, check for D-form memory operands
@@ -1340,17 +1335,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
}
return false;
}
/// Parse an instruction mnemonic followed by its operands.
-bool PPCAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
// The first operand is the token for the instruction name.
// If the next character is a '+' or '-', we need to add it to the
// instruction name, to match what TableGen is doing.
@@ -1554,7 +1547,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
@@ -1568,8 +1561,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
default: return Match_InvalidOperand;
}
- PPCOperand *Op = static_cast<PPCOperand*>(AsmOp);
- if (Op->isImm() && Op->getImm() == ImmVal)
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a4983ad..435a93f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,17 +102,45 @@ public:
// Output the constant in big/little endian byte order.
unsigned Size = Desc.getSize();
- if (IsLittleEndian) {
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)Bits;
- Bits >>= 8;
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
- } else {
- int ShiftValue = (Size * 8) - 8;
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)(Bits >> ShiftValue);
- Bits <<= 8;
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 10d068d..3ac0aca 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
@@ -127,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return true;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 3421b91..bca4085 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -79,7 +79,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index bd58539..a9842b2 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", ""
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model,
FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
+ [DirectivePwr8, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index e89fb2d..fd044d9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,8 +365,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to ADDIS8. If the global address is external,
- // has common linkage, is a function address, or is a jump table
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
// address, then generate a TOC entry and reference that. Otherwise
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
@@ -375,7 +375,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
@@ -384,15 +384,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
IsCommon = GV->hasCommonLinkage();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
- TM.getCodeModel() == CodeModel::Large)
+ if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
+ MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -425,7 +426,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
MOSymbol = getSymbol(GValue);
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ if (GValue->getType()->getElementType()->isFunctionTy() ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -450,17 +452,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
+ if (IsNonLocalFunction || IsExternal ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index ed3cb4d..92a0ec1 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1030,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
@@ -1197,6 +1201,11 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ CCInfo.AllocateStack(LinkageSize, 8);
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
@@ -1218,6 +1227,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
@@ -1858,16 +1874,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias, use the aliasee for determining thread-locality.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
- }
// FIXME: We don't yet handle the complexity of TLS.
- bool IsTLS = GVar && GVar->isThreadLocal();
- if (IsTLS)
+ if (GV->isThreadLocal())
return 0;
// For small code model, generate a simple TOC load.
@@ -1877,8 +1886,8 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
.addGlobalAddress(GV)
.addReg(PPC::X2);
else {
- // If the address is an externally defined symbol, a symbol with
- // common or externally available linkage, a function address, or a
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%X2, GV))
@@ -1889,12 +1898,13 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // !GVar implies a function address. An external variable is one
- // without an initializer.
// If/when switches are implemented, jump tables should be handled
// on the "if" path here.
- if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
- GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ if (CModel == CodeModel::Large ||
+ (GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index e294156..65e9cf2 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ Subtarget(STI) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
/// RemoveVRSaveCode - We have found that this function does not need any code
/// to manipulate the VRSAVE register, even though it uses vector registers.
/// This can happen when the only registers used are known to be live in or out
@@ -236,9 +398,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
- // Maximum call frame needs to be at least big enough for linkage and 8 args.
- unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 94e9b67..7a226f7 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -14,23 +14,18 @@
#define POWERPC_FRAMEINFO_H
#include "PPC.h"
-#include "PPCSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class PPCSubtarget;
+class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
public:
- PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
- Subtarget(sti) {
- }
+ PPCFrameLowering(const PPCSubtarget &STI);
unsigned determineFrameLayout(MachineFunction &MF,
bool UpdateMF = true,
@@ -79,6 +74,12 @@ public:
return isPPC64 ? 16 : 4;
}
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ static unsigned getTOCSaveOffset(void) {
+ return 40;
+ }
+
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
@@ -114,190 +115,9 @@ public:
return 8;
}
- /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
- /// argument area.
- static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
- // For the Darwin ABI / 64-bit SVR4 ABI:
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- if (isDarwinABI || isPPC64)
- return 8 * (isPPC64 ? 8 : 4);
-
- // 32-bit SVR4 ABI:
- // There is no default stack allocated for the 8 first GPR arguments.
- return 0;
- }
-
- /// getMinCallFrameSize - Return the minimum size a call frame can be using
- /// the PowerPC ABI.
- static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
- // The call frame needs to be at least big enough for linkage and 8 args.
- return getLinkageSize(isPPC64, isDarwinABI) +
- getMinCallArgumentsSize(isPPC64, isDarwinABI);
- }
-
- // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
-
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
-
- // Note that the offsets here overlap, but this is fixed up in
- // processFunctionBeforeFrameFinalized.
-
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
-
- // CR save area offset. We map each of the nonvolatile CR fields
- // to the slot for CR2, which is the first of the nonvolatile CR
- // fields to be assigned, so that we only allocate one save slot.
- // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
- {PPC::CR2, -4},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
-
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
-
- return Offsets;
- }
- }
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
};
-
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 7ca706b..d9b242c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -162,7 +162,8 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
unsigned Directive =
DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// If we're using a special group-terminating nop, then we need only one.
- if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
return 1;
return 5 - CurSlots;
@@ -223,7 +224,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
- CurSlots == 6) {
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
@@ -258,8 +259,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
//
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
- : TM(TM) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
EndDispatchGroup();
}
@@ -278,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
isLoad = MCID.mayLoad();
isStore = MCID.mayStore();
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index cf4332c..23f76c1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -54,7 +54,7 @@ public:
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
- const TargetMachine &TM;
+ const ScheduleDAG &DAG;
unsigned NumIssued; // Number of insts issued, including advanced cycles.
@@ -75,7 +75,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
unsigned NumStores;
public:
- PPCHazardRecognizer970(const TargetMachine &TM);
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
virtual void EmitInstruction(SUnit *SU) override;
virtual void AdvanceCycle() override;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 251e8b6..4881b3f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1454,10 +1454,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
- // The first source operand is a TargetGlobalAddress or a
- // TargetJumpTable. If it is an externally defined symbol, a symbol
- // with common linkage, a function address, or a jump table address,
- // or if we are generating code for large code model, we generate:
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it is an externally defined symbol, a symbol with common linkage,
+ // a non-local function address, or a jump table address, or if we are
+ // generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1472,8 +1472,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ if ((GValue->getType()->getElementType()->isFunctionTy() &&
+ (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index cf4c9e6..bc057bf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -50,20 +51,18 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
- if (TM.getSubtargetImpl()->isDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
return new TargetLoweringObjectFileMachO();
- if (TM.getSubtargetImpl()->isSVR4ABI())
- return new PPC64LinuxTargetObjectFile();
-
- return new TargetLoweringObjectFileELF();
+ return new PPC64LinuxTargetObjectFile();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+ Subtarget(*TM.getSubtargetImpl()) {
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
@@ -72,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- bool isPPC64 = Subtarget->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
@@ -98,10 +97,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (isPPC64 || Subtarget->hasFPCVT()) {
+ if (isPPC64 || Subtarget.hasFPCVT()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
@@ -176,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- if (Subtarget->hasFCPSGN()) {
+ if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
@@ -194,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
- if (Subtarget->hasFPRND()) {
+ if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -216,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- if (Subtarget->hasPOPCNTD()) {
+ if (Subtarget.hasPOPCNTD()) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
@@ -228,7 +227,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- if (!Subtarget->useCRBits()) {
+ if (!Subtarget.useCRBits()) {
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
@@ -241,11 +240,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -297,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget->isSVR4ABI()) {
+ if (Subtarget.isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
@@ -317,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget->isSVR4ABI() && !isPPC64)
+ if (Subtarget.isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
@@ -350,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -360,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
@@ -368,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// With the instructions enabled under FPCVT, we can do everything.
- if (PPCSubTarget.hasFPCVT()) {
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
@@ -382,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
- if (Subtarget->use64BitRegs()) {
+ if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
@@ -398,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (Subtarget->hasAltivec()) {
+ if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -488,7 +487,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32,
- Subtarget->useCRBits() ? Legal : Expand);
+ Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -507,7 +506,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -535,7 +534,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
- if (Subtarget->hasVSX()) {
+ if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
@@ -613,7 +612,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
@@ -642,7 +641,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -651,7 +650,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -664,7 +663,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget->isDarwin()) {
+ if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -679,21 +678,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setHasMultipleConditionRegisters();
setMinFunctionAlignment(2);
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget->isJITCodeModel())
+ if (isPPC64 && Subtarget.isJITCodeModel())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
setSupportJumpTables(false);
setInsertFencesForAtomic(true);
- if (Subtarget->enableMachineScheduler())
+ if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
@@ -702,8 +701,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// The Freescale cores does better with aggressive inlining of memcpy and
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
@@ -747,14 +746,14 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
- if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
- getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
return Align;
}
@@ -774,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
case PPCISD::LOAD: return "PPCISD::LOAD";
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
@@ -826,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
@@ -855,15 +853,17 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
@@ -871,18 +871,27 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j, k;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ j = 0;
+ k = 1;
+ } else {
+ j = 2;
+ k = 3;
+ }
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+k))
return false;
}
return true;
@@ -909,27 +918,39 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ }
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ }
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
@@ -946,18 +967,38 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
- ShiftAmt -= i;
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
- return -1;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+
+ ShiftAmt += i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
+ return -1;
+ }
+
+ } else { // Big Endian
+
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
}
return ShiftAmt;
}
@@ -1010,10 +1051,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- return SVOp->getMaskElt(0) / EltSize;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@@ -1299,7 +1344,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1350,7 +1395,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1497,7 +1542,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1518,7 +1563,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1555,7 +1600,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1646,7 +1691,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1891,7 +1936,8 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0);
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2086,6 +2132,43 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
return ArgSize;
}
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ Align = 16;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ return Align;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const TargetMachine &Target,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2094,8 +2177,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
else
@@ -2161,7 +2244,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2184,7 +2268,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- if (PPCSubTarget.hasVSX())
+ if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
else
RC = &PPC::F8RCRegClass;
@@ -2240,23 +2324,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(false, false));
-
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-
- FI->setMinReservedArea(MinReservedArea);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
@@ -2352,32 +2427,6 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
-// Set the size that is at least reserved in caller of this function. Tail
-// call optimized functions' reserved stack space needs to be aligned so that
-// taking the difference between two stack areas will result in an aligned
-// stack.
-void
-PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea,
- bool isPPC64) const {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign
- = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
-}
-
SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Chain,
@@ -2388,6 +2437,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2398,9 +2448,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned ArgOffset = LinkageSize;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2422,14 +2471,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
@@ -2442,24 +2490,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
- ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
- if (isVarArg) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else
- nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
@@ -2481,14 +2520,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- CurArgOffset = ArgOffset;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize)
+ if (ObjSize < PtrByteSize && !isLittleEndian)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
@@ -2522,7 +2555,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
}
MemOps.push_back(Store);
- ++GPR_idx;
}
// Whether we copied from a register or not, advance the offset
// into the parameter save area by a full doubleword.
@@ -2567,8 +2599,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
@@ -2578,18 +2608,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
- // Every 8 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- }
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
&PPC::VSFRCRegClass :
&PPC::F8RCRegClass);
@@ -2608,39 +2633,25 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
++VR_idx;
} else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
needsLoad = true;
}
+ ArgOffset += 16;
break;
}
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type.
if (needsLoad) {
- int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
@@ -2649,11 +2660,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(ArgVal);
}
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2667,7 +2683,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2706,7 +2723,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2997,11 +3015,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(ArgVal);
}
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -3040,75 +3068,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
-/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
-static unsigned
-CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
- bool isPPC64,
- bool isVarArg,
- unsigned CC,
- const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- unsigned &nAltivecParamsAtEnd) {
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
- unsigned NumOps = Outs.size();
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
- ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
- if (!isVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
-
- // Tail call needs the stack to be aligned.
- if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().
- getFrameLowering()->getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- }
-
- return NumBytes;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -3280,7 +3239,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
- EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, false, 0);
@@ -3373,10 +3332,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &PPCSubTarget) {
+ const PPCSubtarget &Subtarget) {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3385,11 +3344,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -3398,8 +3358,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
@@ -3422,8 +3382,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3497,8 +3457,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// additional register being allocated and an unnecessary move instruction
// being generated.
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- Callee, InFlag);
+ AddTOC, InFlag);
Chain = LoadTOCPtr.getValue(0);
InFlag = LoadTOCPtr.getValue(1);
@@ -3613,10 +3575,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
- PPCSubTarget);
+ Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
@@ -3657,7 +3619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
- if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -3682,7 +3644,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
InFlag = Chain.getValue(1);
}
@@ -3718,8 +3685,8 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
@@ -3981,6 +3948,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3997,16 +3965,37 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with at least 48 bytes, which
// is reserved space for [SP][CR][LR][3 x unused].
- // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
- // of this call.
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
- Outs, OutVals, nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4038,8 +4027,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -4068,6 +4057,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
@@ -4099,15 +4097,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
- "ByVal alignment is not a multiple of the pointer size");
-
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4116,7 +4105,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
ArgOffset += PtrByteSize;
continue;
@@ -4124,9 +4113,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
}
if (GPR_idx == NumGPRs && Size < 8) {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4161,8 +4153,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// small aggregates, particularly for packed ones.
// FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4172,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4205,7 +4200,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
@@ -4223,7 +4218,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
+ !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4239,15 +4235,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), false, false,
false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
}
- } else if (GPR_idx != NumGPRs)
- // If we have any FPRs remaining, we may also have GPRs remaining.
- ++GPR_idx;
+ }
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32) {
+ if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4264,21 +4258,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
if (isVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
@@ -4309,10 +4295,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
break;
}
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
+ // Non-varargs Altivec params go into VRs or on the stack.
if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
Arg.getSimpleValueType() == MVT::v2i64) ?
VSRH[VR_idx] : VR[VR_idx];
@@ -4323,12 +4307,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
- ArgOffset += 16;
}
+ ArgOffset += 16;
break;
}
}
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
@@ -4337,19 +4324,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG)) {
+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- SDValue PtrOff = DAG.getIntPtrConstant(40);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
- // R12 must contain the address of an indirect callee. This does not
- // mean the MTCTR instruction must use R12; it's easier to model this
- // as an extra parameter, so do that.
- RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4397,15 +4380,55 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs, OutVals,
- nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4441,7 +4464,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR_32[] = { // 32-bit registers.
@@ -4818,8 +4841,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -4842,8 +4865,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -5063,12 +5086,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ,
@@ -5077,8 +5100,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
// Convert the FP value to an int value through memory.
- bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
@@ -5120,17 +5143,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
DAG.getConstantFP(1.0, Op.getValueType()),
DAG.getConstantFP(0.0, Op.getValueType()));
- assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
@@ -5146,7 +5169,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
- !PPCSubTarget.hasFPCVT() &&
+ !Subtarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -5184,7 +5207,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
@@ -5201,7 +5224,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Ld;
- if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -5220,7 +5243,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
} else {
- assert(PPCSubTarget.isPPC64() &&
+ assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
@@ -5242,7 +5265,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5557,6 +5580,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
+ // The remaining cases assume either big endian element order or
+ // a splat-size that equates to the element size of the vector
+ // to be built. An example that doesn't work for little endian is
+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
+ // and a vector element size of 16 bits. The code below will
+ // produce the vector in big endian element order, which for little
+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
+
+ // For now, just avoid these optimizations in that case.
+ // FIXME: Develop correct optimizations for LE with mismatched
+ // splat and element sizes.
+
+ if (Subtarget.isLittleEndian() &&
+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
+ return SDValue();
+
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -5725,6 +5764,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
@@ -5733,15 +5773,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true) ||
- PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
return Op;
}
}
@@ -5749,15 +5789,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false) ||
- PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -5791,7 +5831,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
// perfect shuffle vector to determine if it is cost effective to do this as
// discrete instructions, or whether we should use a vperm.
- if (isFourElementShuffle) {
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
@@ -5820,6 +5862,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
@@ -5828,13 +5875,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i32));
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
+ MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
ResultMask);
- return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
}
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
@@ -6027,6 +6083,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
@@ -6038,13 +6095,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, DAG, dl, MVT::v8i16);
OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
- // Merge the results together.
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
- Ops[i*2 ] = 2*i+1;
- Ops[i*2+1] = 2*i+1+16;
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
}
- return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
llvm_unreachable("Unknown mul to lower!");
}
@@ -6064,17 +6132,17 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG, PPCSubTarget);
+ return LowerVASTART(Op, DAG, Subtarget);
case ISD::VAARG:
- return LowerVAARG(Op, DAG, PPCSubTarget);
+ return LowerVAARG(Op, DAG, Subtarget);
case ISD::VACOPY:
- return LowerVACOPY(Op, DAG, PPCSubTarget);
+ return LowerVACOPY(Op, DAG, Subtarget);
- case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
- return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -6144,7 +6212,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
EVT VT = N->getValueType(0);
if (VT == MVT::i64) {
- SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
Results.push_back(NewNode);
Results.push_back(NewNode.getValue(1));
@@ -6255,7 +6323,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6450,7 +6518,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
unsigned BufReg = MI->getOperand(1).getReg();
- if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
@@ -6463,12 +6531,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned BaseReg;
if (MF->getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
- BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
else
- BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
.addReg(BufReg);
@@ -6492,10 +6560,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// mainMBB:
// mainDstReg = 0
MIB = BuildMI(mainMBB, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
- if (PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
.addImm(LabelOffset)
@@ -6607,7 +6675,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
- if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
@@ -6645,7 +6713,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8)) {
@@ -6765,13 +6833,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
- BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
- BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
@@ -6862,7 +6930,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// We must use 64-bit registers for addresses when targeting 64-bit,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI->getOperand(0).getReg();
@@ -7070,10 +7138,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -7086,7 +7154,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7133,10 +7201,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -7149,7 +7217,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7266,10 +7334,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (!Visited.count(ChainLD->getChain().getNode()))
Queue.push_back(ChainLD->getChain().getNode());
} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
- for (SDNode::op_iterator O = ChainNext->op_begin(),
- OE = ChainNext->op_end(); O != OE; ++O)
- if (!Visited.count(O->getNode()))
- Queue.push_back(O->getNode());
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
} else
LoadRoots.insert(ChainNext);
}
@@ -7312,7 +7379,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- assert(PPCSubTarget.useCRBits() &&
+ assert(Subtarget.useCRBits() &&
"Expecting to be tracking CR bits");
// If we're tracking CR bits, we need to be careful that we don't have:
// trunc(binary-ops(zext(x), zext(y)))
@@ -7610,9 +7677,9 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
return SDValue();
if (!((N->getOperand(0).getValueType() == MVT::i1 &&
- PPCSubTarget.useCRBits()) ||
+ Subtarget.useCRBits()) ||
(N->getOperand(0).getValueType() == MVT::i32 &&
- PPCSubTarget.isPPC64())))
+ Subtarget.isPPC64())))
return SDValue();
if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -7930,8 +7997,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
EVT VT = RV.getValueType();
@@ -8051,6 +8118,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// This implements the loading of unaligned vectors as described in
// the venerable Apple Velocity Engine overview. Specifically:
@@ -8058,25 +8126,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
//
// The general idea is to expand a sequence of one or more unaligned
- // loads into a alignment-based permutation-control instruction (lvsl),
- // a series of regular vector loads (which always truncate their
- // input address to an aligned address), and a series of permutations.
- // The results of these permutations are the requested loaded values.
- // The trick is that the last "extra" load is not taken from the address
- // you might suspect (sizeof(vector) bytes after the last requested
- // load), but rather sizeof(vector) - 1 bytes after the last
- // requested vector. The point of this is to avoid a page fault if the
- // base address happened to be aligned. This works because if the base
- // address is aligned, then adding less than a full vector length will
- // cause the last vector in the sequence to be (re)loaded. Otherwise,
- // the next vector will be fetched as you might suspect was necessary.
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
// We might be able to reuse the permutation generation from
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
- DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
// Refine the alignment of the original load (a "new" load created here
// which was identical to the first except for the alignment would be
@@ -8125,8 +8196,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (ExtraLoad.getValueType() != MVT::v4i32)
ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
- SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
- BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
@@ -8151,12 +8232,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
++UI;
SmallVector<SDValue, 8> Ops;
- for (SDNode::op_iterator O = User->op_begin(),
- OE = User->op_end(); O != OE; ++O) {
- if (*O == Use)
+ for (const SDUse &O : User->ops()) {
+ if (O == Use)
Ops.push_back(To);
else
- Ops.push_back(*O);
+ Ops.push_back(O);
}
DAG.UpdateNodeOperands(User, Ops);
@@ -8166,9 +8246,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN:
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl &&
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
@@ -8180,8 +8263,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl) {
- // We've found another LVSL, and this address if an aligned
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -8190,6 +8273,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+ }
break;
case ISD::BSWAP:
@@ -8537,11 +8621,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
@@ -8573,7 +8657,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// register.
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
- if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
@@ -8707,8 +8791,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
// the stack.
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -8762,8 +8846,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// this table could be generated automatically from RegInfo.
unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
EVT VT) const {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
(!isPPC64 && VT != MVT::i32))
@@ -8804,7 +8888,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- if (this->PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
return MVT::i64;
} else {
return MVT::i32;
@@ -8863,7 +8947,7 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return false;
if (VT.getSimpleVT().isVector()) {
- if (PPCSubTarget.hasVSX()) {
+ if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64)
return false;
} else {
@@ -8907,7 +8991,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 080ef5d..df05aa5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -18,7 +18,6 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
-#include "PPCSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -71,19 +70,14 @@ namespace llvm {
TOC_ENTRY,
- /// The following three target-specific nodes are used for calls through
+ /// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
- /// Restore the TOC from the TOC save area of the current stack frame.
- /// This is basically a hard coded load instruction which additionally
- /// takes/produces a flag.
- TOC_RESTORE,
-
/// Like a regular LOAD but additionally taking/producing a flag.
LOAD,
- /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
- /// a hard coded load instruction.
+ /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+ /// destination.
LOAD_TOC,
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
@@ -303,25 +297,27 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -334,7 +330,7 @@ namespace llvm {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -343,8 +339,9 @@ namespace llvm {
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
}
+ class PPCSubtarget;
class PPCTargetLowering : public TargetLowering {
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget &Subtarget;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -613,11 +610,6 @@ namespace llvm {
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
SDValue ArgVal, SDLoc dl) const;
- void
- setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea, bool isPPC64) const;
-
SDValue
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b71c09e..9318f70 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
- "ld 2, 8($reg)", IIC_LdStLD,
- [(PPCload_toc i64:$reg)]>, isPPC64;
-
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
- "ld 2, 40(1)", IIC_LdStLD,
- [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+ "ld 2, $src", IIC_LdStLD,
+ [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f3c2eab..dce46d8 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,111 +22,127 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false) != -1;
+ return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true) != -1;
+ return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 7fed2c6..1e4396c 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
let Inst{30-31} = xo;
}
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> RST;
- bits<14> DS;
- bits<5> RA;
-
- let Pattern = pattern;
-
- let Inst{6-10} = RST;
- let Inst{11-15} = RA;
- let Inst{16-29} = DS;
- let Inst{30-31} = xo;
-}
// 1.7.6 X-Form
class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fd72384..9bac91d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -60,23 +61,25 @@ cl::Hidden);
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
-PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl()) {}
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
+ const InstrItineraryData *II =
+ &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -84,17 +87,18 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
- assert(TM.getInstrInfo() && "No InstrInfo?");
+ assert(DAG->TII && "No InstrInfo?");
- return new PPCHazardRecognizer970(TM);
+ return new PPCHazardRecognizer970(*DAG);
}
return new ScoreboardHazardRecognizer(II, DAG);
@@ -129,7 +133,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// On some cores, there is an additional delay between writing to a condition
// register, and using it from a branch.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
switch (Directive) {
default: break;
case PPC::DIR_7400:
@@ -142,6 +146,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case PPC::DIR_PWR6:
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
Latency += 2;
break;
}
@@ -313,12 +318,13 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// This function is used for scheduling, and the nop wanted here is the type
// that terminates dispatch groups on the POWER cores.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
unsigned Opcode;
switch (Directive) {
default: Opcode = PPC::NOP; break;
case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
}
DebugLoc DL;
@@ -332,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
@@ -538,7 +544,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// One-way branch.
if (!FBB) {
@@ -579,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!TM.getSubtargetImpl()->hasISEL())
+ if (!Subtarget.hasISEL())
return false;
if (Cond.size() != 2)
@@ -623,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(TM.getSubtargetImpl()->hasISEL() &&
+ assert(Subtarget.hasISEL() &&
"Cannot insert select on target without ISEL support");
// Get the register classes.
@@ -826,7 +832,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
.addReg(SrcReg,
@@ -921,7 +927,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_VRSAVE),
@@ -1035,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
} else {
ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
@@ -1102,7 +1108,7 @@ bool PPCInstrInfo::PredicateInstruction(
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
@@ -1124,7 +1130,7 @@ bool PPCInstrInfo::PredicateInstruction(
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
@@ -1162,7 +1168,7 @@ bool PPCInstrInfo::PredicateInstruction(
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
@@ -1323,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// for equality checks (as those don't depend on the sign). On PPC64,
// we are restricted to equality for unsigned 64-bit comparisons and for
// signed 32-bit comparisons the applicability is more restricted.
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index d9db3e1..83f14c6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,7 @@ enum PPC970_Unit {
class PPCInstrInfo : public PPCGenInstrInfo {
- PPCTargetMachine &TM;
+ PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
bool StoreRegToStackSlot(MachineFunction &MF,
@@ -80,7 +80,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
public:
- explicit PPCInstrInfo(PPCTargetMachine &TM);
+ explicit PPCInstrInfo(PPCSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -89,7 +89,7 @@ public:
const PPCRegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e421f8e..c2e3382 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -141,9 +141,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPSideEffect,
- SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 7bbc71b..e5f113a 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -13,7 +13,7 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -25,6 +25,11 @@ using namespace llvm;
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
+ : Subtarget(STI), is64Bit(STI.isPPC64()) {
+ useGOT = 0;
+}
+
#define BUILD_ADDIS(RD,RS,IMM16) \
((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
#define BUILD_ORI(RD,RS,UIMM16) \
@@ -393,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ } else if (Subtarget.isDarwinABI()){
JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 0693e3e..b6b37ff 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -18,32 +18,29 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class PPCTargetMachine;
+class PPCSubtarget;
+class PPCJITInfo : public TargetJITInfo {
+protected:
+ PPCSubtarget &Subtarget;
+ bool is64Bit;
- class PPCJITInfo : public TargetJITInfo {
- protected:
- PPCTargetMachine &TM;
- bool is64Bit;
- public:
- PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
- useGOT = 0;
- is64Bit = tmIs64Bit;
- }
+public:
+ PPCJITInfo(PPCSubtarget &STI);
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
+ unsigned char *GOTBase) override;
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
- };
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
+};
}
#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e333b51..eca774e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -973,6 +973,14 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
Offset += MI.getOperand(OffsetOperandNo).getImm();
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index f742f72..dc16742 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -16,9 +16,7 @@ using namespace llvm;
#define DEBUG_TYPE "powerpc-selectiondag-info"
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
-}
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 341b69c..b2e7f3b 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class PPCTargetMachine;
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ explicit PPCSelectionDAGInfo(const DataLayout *DL);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ea9daee..2e1b74a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -32,15 +32,57 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
- CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel) {
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM,
+ bool is64Bit, CodeGenOpt::Level OptLevel)
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ TLInfo(TM), TSInfo(&DL) {}
+
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void PPCSubtarget::SetJITMode() {
@@ -156,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine endianness.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+
+ // FIXME: For now, we disable VSX in little-endian mode until endian
+ // issues in those instructions can be addressed.
+ if (IsLittleEndian)
+ HasVSX = false;
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -200,6 +247,7 @@ static bool needsAggressiveScheduling(unsigned Directive) {
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ee43fd5..2a16699 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,7 +14,13 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -50,6 +56,7 @@ namespace PPC {
DIR_PWR6,
DIR_PWR6X,
DIR_PWR7,
+ DIR_PWR8,
DIR_64
};
}
@@ -102,12 +109,19 @@ protected:
/// OptLevel - What default optimization level we're emitting code for.
CodeGenOpt::Level OptLevel;
+ PPCFrameLowering FrameLowering;
+ const DataLayout DL;
+ PPCInstrInfo InstrInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
+ const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
CodeGenOpt::Level OptLevel);
/// ParseSubtargetFeatures - Parses features string setting specified
@@ -127,10 +141,21 @@ public:
///
unsigned getDarwinDirective() const { return DarwinDirective; }
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ PPCJITInfo *getJITInfo() { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
/// \brief Reset the features for the PowerPC target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2323add..9563b90 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -37,53 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
- std::string Ret;
-
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
- Ret = "e";
- else
- Ret = "E";
-
- Ret += DataLayout::getManglingComponent(T);
-
- // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
- // pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
- Ret += "-p:32:32";
-
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
- Ret += "-n32:64";
- else
- Ret += "-n32";
-
- return Ret;
-}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64Bit, OL),
- DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
- FrameLowering(Subtarget), JITInfo(*this, is64Bit),
- TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 9e92494..4c7029c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,11 +14,7 @@
#ifndef PPC_TARGETMACHINE_H
#define PPC_TARGETMACHINE_H
-#include "PPCFrameLowering.h"
-#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCJITInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,13 +25,6 @@ namespace llvm {
///
class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- PPCInstrInfo InstrInfo;
- PPCFrameLowering FrameLowering;
- PPCJITInfo JITInfo;
- PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -43,25 +32,29 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
- const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const PPCFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ const PPCInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- PPCJITInfo *getJITInfo() override { return &JITInfo; }
+ const PPCFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const PPCTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
}
- const DataLayout *getDataLayout() const override { return &DL; }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
// Pass Pipeline Configuration
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 949fdfb..713fc4b 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -17,6 +17,7 @@
namespace llvm {
class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
class MCAsmInfo;
@@ -40,6 +41,7 @@ FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
@@ -47,14 +49,18 @@ void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
// Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
ImmutablePass *
createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
+void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
+extern char &SIFixSGPRLiveRangesID;
+
+
extern Target TheAMDGPUTarget;
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 2edc115..6ff9ab7 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -7,8 +7,7 @@
//
//==-----------------------------------------------------------------------===//
-// Include AMDIL TD files
-include "AMDILBase.td"
+include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// Subtarget Features
@@ -33,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
"false",
"Disable the if conversion pass">;
-def FeatureFP64 : SubtargetFeature<"fp64",
+def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
- "Enable 64bit double precision operations">;
+ "Enable double precision operations">;
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"Is64bit",
"true",
- "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
- "Is32on64bit",
- "false",
- "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+ "Specify if 64-bit addressing should be used">;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"R600ALUInst",
"false",
- "Older version of ALU instructions encoding.">;
+ "Older version of ALU instructions encoding">;
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
"HasVertexCache",
"true",
- "Specify use of dedicated vertex cache.">;
+ "Specify use of dedicated vertex cache">;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"CaymanISA",
@@ -87,28 +81,40 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+ "localmemorysize"#Value,
+ "LocalMemorySize",
+ !cast<string>(Value),
+ "The size of local memory in bytes">;
+
class SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
Value#" GPU generation", Implies>;
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
def FeatureR600 : SubtargetFeatureGeneration<"R600",
- [FeatureR600ALUInst, FeatureFetchLimit8]>;
+ [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
def FeatureR700 : SubtargetFeatureGeneration<"R700",
- [FeatureFetchLimit16]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
- [FeatureFetchLimit16]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
- [FeatureFetchLimit16, FeatureWavefrontSize64]>;
+ [FeatureFetchLimit16, FeatureWavefrontSize64,
+ FeatureLocalMemorySize32768]
+>;
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
- [Feature64BitPtr, FeatureFP64]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
- [Feature64BitPtr, FeatureFP64]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -120,6 +126,10 @@ def AMDGPU : Target {
let InstructionSet = AMDGPUInstrInfo;
}
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
//===----------------------------------------------------------------------===//
// Predicate helper class
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 170f479..a6e217b 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,6 +19,7 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
@@ -35,6 +36,24 @@
using namespace llvm;
+// TODO: This should get the default rounding mode from the kernel. We just set
+// the default here, but this could change if the OpenCL rounding mode pragmas
+// are used.
+//
+// The denormal mode here should match what is reported by the OpenCL runtime
+// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
+// can also be override to flush with the -cl-denorms-are-zero compiler flag.
+//
+// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
+// precision, and leaves single precision to flush all and does not report
+// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
+// CL_FP_DENORM for both.
+static uint32_t getFPMode(MachineFunction &) {
+ return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
+ FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
+ FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+}
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
MCStreamer &Streamer) {
@@ -92,6 +111,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
false);
+ OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+ false);
+ OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+ false);
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer.emitRawComment(
@@ -279,16 +302,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (VCCUsed)
MaxSGPR += 2;
- ProgInfo.CodeLen = CodeSize;
- ProgInfo.NumSGPR = MaxSGPR;
ProgInfo.NumVGPR = MaxVGPR;
+ ProgInfo.NumSGPR = MaxSGPR;
+
+ // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
+ // register.
+ ProgInfo.FloatMode = getFPMode(MF);
+
+ // XXX: Not quite sure what this does, but sc seems to unset this.
+ ProgInfo.IEEEMode = 0;
+
+ // Do not clamp NAN to 0.
+ ProgInfo.DX10Clamp = 0;
+
+ ProgInfo.CodeLen = CodeSize;
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
unsigned RsrcReg;
switch (MFI->ShaderType) {
default: // Fall through
@@ -298,25 +332,41 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
}
- OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
-
unsigned LDSAlignShift;
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
- // LDS is allocated in 64 dword blocks
+ // LDS is allocated in 64 dword blocks.
LDSAlignShift = 8;
} else {
- // LDS is allocated in 128 dword blocks
+ // LDS is allocated in 128 dword blocks.
LDSAlignShift = 9;
}
+
unsigned LDSBlocks =
- RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
if (MFI->ShaderType == ShaderType::COMPUTE) {
+ OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+
+ const uint32_t ComputePGMRSrc1 =
+ S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
+ S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
+ S_00B848_PRIORITY(KernelInfo.Priority) |
+ S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
+ S_00B848_PRIV(KernelInfo.Priv) |
+ S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
+ S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
+ S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
+
+ OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
+
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
+ } else {
+ OutStreamer.EmitIntValue(RsrcReg, 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
+ S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
}
+
if (MFI->ShaderType == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 71adc9a..c1acb6e 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -25,13 +25,28 @@ class AMDGPUAsmPrinter : public AsmPrinter {
private:
struct SIProgramInfo {
SIProgramInfo() :
- CodeLen(0),
+ NumVGPR(0),
NumSGPR(0),
- NumVGPR(0) {}
+ Priority(0),
+ FloatMode(0),
+ Priv(0),
+ DX10Clamp(0),
+ DebugMode(0),
+ IEEEMode(0),
+ CodeLen(0) {}
+ // Fields set in PGM_RSRC1 pm4 packet.
+ uint32_t NumVGPR;
+ uint32_t NumSGPR;
+ uint32_t Priority;
+ uint32_t FloatMode;
+ uint32_t Priv;
+ uint32_t DX10Clamp;
+ uint32_t DebugMode;
+ uint32_t IEEEMode;
+
+ // Bonus information for debugging.
uint64_t CodeLen;
- unsigned NumSGPR;
- unsigned NumVGPR;
};
void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
deleted file mode 100644
index 91aeee2..0000000
--- a/lib/Target/R600/AMDGPUConvertToISA.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass lowers AMDIL machine instructions to the appropriate
-/// hardware instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUConvertToISAPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
-public:
- AMDGPUConvertToISAPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {return "AMDGPU Convert to ISA";}
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUConvertToISAPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
- return new AMDGPUConvertToISAPass(tm);
-}
-
-bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
- }
- }
- return false;
-}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
index e7e90d3..9e8302e 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -83,7 +83,7 @@ int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
OffsetBytes += MFI->getObjectSize(i);
- // Each regiter holds 4 bytes, so we must always align the offset to at
+ // Each register holds 4 bytes, so we must always align the offset to at
// least 4 bytes, so that 2 frame objects won't share the same register.
OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
}
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index f1f0bfa..b4d79e5 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -83,6 +84,11 @@ private:
SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
+ SDValue &ImmOffset) const;
+
+ SDNode *SelectADD_SUB_I64(SDNode *N);
+ SDNode *SelectDIV_SCALE(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -211,51 +217,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// We are selecting i64 ADD here instead of custom lower it during
// DAG legalization, so we can fold some i64 ADDs used for address
// calculation into the LOAD and STORE instructions.
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::SUB: {
if (N->getValueType(0) != MVT::i64 ||
ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
- SDLoc DL(N);
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
-
- SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub0);
- SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub1);
-
- SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub0);
- SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub1);
-
- SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
-
- SmallVector<SDValue, 8> AddLoArgs;
- AddLoArgs.push_back(SDValue(Lo0, 0));
- AddLoArgs.push_back(SDValue(Lo1, 0));
-
- SDNode *AddLo = CurDAG->getMachineNode(
- isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32,
- DL, VTList, AddLoArgs);
- SDValue Carry = SDValue(AddLo, 1);
- SDNode *AddHi = CurDAG->getMachineNode(
- isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32,
- DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
-
- SDValue Args[5] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
- SDValue(AddLo,0),
- Sub0,
- SDValue(AddHi,0),
- Sub1,
- };
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+ return SelectADD_SUB_I64(N);
}
+ case ISD::SCALAR_TO_VECTOR:
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
const AMDGPURegisterInfo *TRI =
@@ -264,7 +235,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
- assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ EVT EltVT = VT.getVectorElementType();
+ assert(EltVT.bitsEq(MVT::i32));
if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
bool UseVReg = true;
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
@@ -305,7 +277,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// can't be bundled by our scheduler.
switch(NumVectorElts) {
case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = AMDGPU::R600_Reg128RegClassID;
+ break;
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
}
@@ -313,8 +290,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
if (NumVectorElts == 1) {
- return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
- VT.getVectorElementType(),
+ return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0), RegClass);
}
@@ -323,11 +299,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// 16 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
- SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
+ SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
bool IsRegSeq = true;
- for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ unsigned NOps = N->getNumOperands();
+ for (unsigned i = 0; i < NOps; i++) {
// XXX: Why is this here?
if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
@@ -337,6 +314,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
}
+
+ if (NOps != NumVectorElts) {
+ // Fill in the missing undef elements if this was a scalar_to_vector.
+ assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+
+ MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ SDLoc(N), EltVT);
+ for (unsigned i = NOps; i < NumVectorElts; ++i) {
+ RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ }
+ }
+
if (!IsRegSeq)
break;
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
@@ -466,6 +457,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
PackedOffsetWidth);
}
+ case AMDGPUISD::DIV_SCALE: {
+ return SelectDIV_SCALE(N);
+ }
}
return SelectCode(N);
}
@@ -659,6 +653,129 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
+SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ bool IsAdd = (N->getOpcode() == ISD::ADD);
+
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+
+ SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub0);
+ SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub1);
+
+ SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub0);
+ SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub1);
+
+ SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
+ SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+
+
+ unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
+ unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+ if (!isCFDepth0()) {
+ Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
+ CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
+ }
+
+ SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
+ SDValue Carry(AddLo, 1);
+ SDNode *AddHi
+ = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
+ SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+
+ SDValue Args[5] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ SDValue(AddLo,0),
+ Sub0,
+ SDValue(AddHi,0),
+ Sub1,
+ };
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(VT == MVT::f32 || VT == MVT::f64);
+
+ unsigned Opc
+ = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+ const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+
+ SDValue Ops[] = {
+ N->getOperand(0),
+ N->getOperand(1),
+ N->getOperand(2),
+ Zero,
+ Zero,
+ Zero,
+ Zero
+ };
+
+ return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
+static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
+ return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
+ Ptr), 0);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
+ SDValue &Offset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Addr);
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+
+ if (isUInt<12>(C1->getZExtValue())) {
+
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add (add N2, N3), C1)
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
+ Offset = N3;
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return true;
+ }
+
+ // (add N0, C1)
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
+ Offset = N0;
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return true;
+ }
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ // (add N0, N1)
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
+ Offset = N1;
+ ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ }
+
+ // default case
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
+ Offset = Addr;
+ ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 6c443ea..0ada7a3 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -16,9 +16,9 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -84,13 +84,37 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
#include "AMDGPUGenCallingConv.inc"
+// Find a larger type to do a load / store of a vector with.
+EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
+ unsigned StoreSize = VT.getStoreSizeInBits();
+ if (StoreSize <= 32)
+ return EVT::getIntegerVT(Ctx, StoreSize);
+
+ assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
+ return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
+// Type for a vector that will be loaded to.
+EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
+ unsigned StoreSize = VT.getStoreSizeInBits();
+ if (StoreSize <= 32)
+ return EVT::getIntegerVT(Ctx, 32);
+
+ return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
- // Initialize target lowering borrowed from AMDIL
- InitAMDILLowering();
+ setOperationAction(ISD::Constant, MVT::i32, Legal);
+ setOperationAction(ISD::Constant, MVT::i64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -107,9 +131,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- // The hardware supports ROTR, but not ROTL
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
-
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -118,6 +139,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::STORE, MVT::i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
@@ -161,6 +185,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
@@ -202,29 +229,63 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+ setOperationAction(ISD::FRINT, MVT::f64, Custom);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+ }
- setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
- setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ if (!Subtarget->hasBFI()) {
+ // fcopysign can be done in a single instruction with BFI.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ }
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+ for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::SUB, MVT::i64, Expand);
+ // GPU does not have divrem function for signed or unsigned.
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ if (!Subtarget->hasBCNT(32))
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+ if (!Subtarget->hasBCNT(64))
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // The hardware supports 32-bit ROTR, but not ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- static const MVT::SimpleValueType IntTypes[] = {
+ static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
- for (MVT VT : IntTypes) {
- //Expand the following operations for the current type by default
+ for (MVT VT : VectorIntTypes) {
+ // Expand the following operations for the current type by default.
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
@@ -232,40 +293,93 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::MUL, VT, Expand);
setOperationAction(ISD::OR, VT, Expand);
setOperationAction(ISD::SHL, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SRL, VT, Expand);
setOperationAction(ISD::SRA, VT, Expand);
+ setOperationAction(ISD::SRL, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ // TODO: Implement custom UREM / SREM routines.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
}
- static const MVT::SimpleValueType FloatTypes[] = {
+ static const MVT::SimpleValueType FloatVectorTypes[] = {
MVT::v2f32, MVT::v4f32
};
- for (MVT VT : FloatTypes) {
+ for (MVT VT : FloatVectorTypes) {
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
}
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT_CC);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setJumpIsExpensive(true);
+
+ setSelectIsExpensive(false);
+ PredictableSelectIsExpensive = false;
+
+ // There are no integer divide instructions, and these expand to a pretty
+ // large sequence of instructions.
+ setIntDivIsCheap(false);
+ setPow2DivIsCheap(false);
+
+ // TODO: Investigate this when 64-bit divides are implemented.
+ addBypassSlowDiv(64, 32);
+
+ // FIXME: Need to really handle these.
+ MaxStoresPerMemcpy = 4096;
+ MaxStoresPerMemmove = 4096;
+ MaxStoresPerMemset = 4096;
}
//===----------------------------------------------------------------------===//
@@ -276,6 +390,23 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const {
return MVT::i32;
}
+bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
+ return true;
+}
+
+// The backend supports 32 and 64 bit floating point immediates.
+// FIXME: Why are we reporting vectors of FP immediates as legal?
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
+}
+
+// We don't want to shrink f64 / f32 constants.
+bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
+}
+
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
@@ -330,6 +461,10 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
return Src == MVT::i32 && Dest == MVT::i64;
}
+bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+}
+
bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// There aren't really 64-bit registers, but pairs of 32-bit ones and only a
// limited number of native 64-bit operations. Shrinking an operation to fit
@@ -383,25 +518,28 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
return SDValue();
}
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
- const {
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
llvm_unreachable("Custom lowering code for this"
"instruction is not implemented yet!");
break;
- // AMDIL DAG lowering
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- // AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
+ case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+ case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
+ case ISD::FRINT: return LowerFRINT(Op, DAG);
+ case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+ case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
@@ -419,95 +557,23 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
- case ISD::UDIV: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM);
- break;
- }
- case ISD::UREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM.getValue(1));
- break;
- }
- case ISD::UDIVREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- SDValue one = DAG.getConstant(1, HalfVT);
- SDValue zero = DAG.getConstant(0, HalfVT);
-
- //HiLo split
- SDValue LHS = N->getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
-
- SDValue RHS = N->getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
-
- // Get Speculative values
- SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
-
- SDValue REM_Hi = zero;
- SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
-
- SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
- SDValue DIV_Lo = zero;
-
- const unsigned halfBitWidth = HalfVT.getSizeInBits();
-
- for (unsigned i = 0; i < halfBitWidth; ++i) {
- SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
- // Get Value of high bit
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
-
- SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
- DAG.getConstant(halfBitWidth - 1, HalfVT));
- REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
- REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
- REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
- REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
- SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
- SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
-
- DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-
- // Update REM
-
- SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
- REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
- REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
- REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
- }
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ if (!Node)
+ return;
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
- Results.push_back(DIV);
- Results.push_back(REM);
- break;
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE: {
+ SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
+ if (Lowered.getNode())
+ Results.push_back(Lowered);
+ return;
}
default:
return;
@@ -531,12 +597,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
SelectionDAG &DAG) const {
const DataLayout *TD = getTargetMachine().getDataLayout();
SDLoc DL(InitPtr);
+ Type *InitTy = Init->getType();
+
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
- EVT VT = EVT::getEVT(CI->getType());
- PointerType *PtrTy = PointerType::get(CI->getType(), 0);
- return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(CI->getType()));
+ EVT VT = EVT::getEVT(InitTy);
+ PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+ return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+ TD->getPrefTypeAlignment(InitTy));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
@@ -547,7 +615,6 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
TD->getPrefTypeAlignment(CFP->getType()));
}
- Type *InitTy = Init->getType();
if (StructType *ST = dyn_cast<StructType>(InitTy)) {
const StructLayout *SL = TD->getStructLayout(ST);
@@ -589,6 +656,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+ if (isa<UndefValue>(Init)) {
+ EVT VT = EVT::getEVT(InitTy);
+ PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+ return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+ TD->getPrefTypeAlignment(InitTy));
+ }
+
Init->dump();
llvm_unreachable("Unhandled constant initializer");
}
@@ -628,11 +703,19 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
unsigned Size = TD->getTypeAllocSize(EltType);
unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+ MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+
+ int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
+ SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
+
const GlobalVariable *Var = cast<GlobalVariable>(GV);
+ if (!Var->hasInitializer()) {
+ // This has no use, but bugpoint will hit it.
+ return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
+ }
+
const Constant *Init = Var->getInitializer();
- int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
- SDValue InitPtr = DAG.getFrameIndex(FI,
- getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
SmallVector<SDNode*, 8> WorkList;
for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
@@ -651,8 +734,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
}
DAG.UpdateNodeOperands(*I, Ops);
}
- return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op),
- getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+ return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
}
}
}
@@ -688,8 +770,7 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
@@ -705,26 +786,66 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
default: return Op;
- case AMDGPUIntrinsic::AMDIL_abs:
+ case AMDGPUIntrinsic::AMDGPU_abs:
+ case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
return LowerIntrinsicIABS(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_exp:
- return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_fraction:
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_max:
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+
+ case AMDGPUIntrinsic::AMDGPU_clamp:
+ case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
+ return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_div_scale: {
+ // 3rd parameter required to be a constant.
+ const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ if (!Param)
+ return DAG.getUNDEF(VT);
+
+ // Translate to the operands expected by the machine instruction. The
+ // first parameter must be the same as the first instruction.
+ SDValue Numerator = Op.getOperand(1);
+ SDValue Denominator = Op.getOperand(2);
+ SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+
+ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
+ Src0, Denominator, Numerator);
+ }
+
+ case Intrinsic::AMDGPU_div_fmas:
+ return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_div_fixup:
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_trig_preop:
+ return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::AMDGPU_rcp:
+ return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::AMDGPU_rsq:
+ return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
+ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::AMDGPU_rsq_clamped:
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_min:
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -748,6 +869,18 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
+
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
Op.getOperand(1),
@@ -771,8 +904,16 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_round_nearest:
+ case AMDGPUIntrinsic::AMDGPU_brev:
+ return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
+ return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
}
}
@@ -863,27 +1004,41 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+ EVT LoadVT = Op.getValueType();
EVT EltVT = Op.getValueType().getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
+
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
SmallVector<SDValue, 8> Loads;
+ SmallVector<SDValue, 8> Chains;
+
SDLoc SL(Op);
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
- Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
- Load->getChain(), Ptr,
- MachinePointerInfo(Load->getMemOperand()->getValue()),
- MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment()));
+
+ SDValue NewLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+ Load->getChain(), Ptr,
+ MachinePointerInfo(Load->getMemOperand()->getValue()),
+ MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment());
+ Loads.push_back(NewLoad.getValue(0));
+ Chains.push_back(NewLoad.getValue(1));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
+ DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
+ };
+
+ return DAG.getMergeValues(Ops, SL);
}
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
- StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemVT = Store->getMemoryVT();
unsigned MemBits = MemVT.getSizeInBits();
@@ -981,7 +1136,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Load->getBasePtr(),
MemVT,
Load->getMemOperand());
- return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
+ ExtLoad32.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
@@ -995,7 +1156,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, MVT::i8, MMO);
- return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
+ NewLD.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
// Lower loads constant address space global variable loads
@@ -1003,11 +1170,12 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
isa<GlobalVariable>(
GetUnderlyingObject(Load->getMemOperand()->getValue()))) {
+
SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(2, MVT::i32));
- return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+ return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
Load->getChain(), Ptr,
DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
}
@@ -1034,10 +1202,21 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemEltVT = MemVT.getScalarType();
if (ExtType == ISD::SEXTLOAD) {
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
+ Load->getChain()
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
- return DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
+ SDValue Ops[] = {
+ DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
+ Load->getChain()
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1097,6 +1276,251 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(ISD::FMUL, DL, FLTTY,
+ fa, DAG.getNode(AMDGPUISD::RCP, DL, FLTTY, fb));
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType().getScalarType();
+
+ if (OVT == MVT::i64)
+ return LowerSDIV64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSDIV32(Op, DAG);
+
+ if (OVT == MVT::i16 || OVT == MVT::i8) {
+ // FIXME: We should be checking for the masked bits. This isn't reached
+ // because i8 and i16 are not legal types.
+ return LowerSDIV24(Op, DAG);
+ }
+
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+
+ if (OVT.getScalarType() == MVT::i64)
+ return LowerSREM64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSREM32(Op, DAG);
+
+ return SDValue(Op.getNode(), 0);
+}
+
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -1201,6 +1625,177 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue NegOne = DAG.getConstant(-1, VT);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
+ SDValue RSign = LHSign; // Remainder sign is the same as LHS
+
+ LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
+
+ LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
+
+ SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue Rem = Div.getValue(1);
+
+ Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
+
+ Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
+
+ SDValue Res[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Res, DL);
+}
+
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src)
+ // if (src > 0.0 && src != result)
+ // result += 1.0
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ const SDValue Zero = DAG.getConstant(0, MVT::i32);
+ const SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ // Extract the upper half, since this is where we will find the sign and
+ // exponent.
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+ const unsigned FractBits = 52;
+ const unsigned ExpBits = 11;
+
+ // Extract the exponent.
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+ Hi,
+ DAG.getConstant(FractBits - 32, MVT::i32),
+ DAG.getConstant(ExpBits, MVT::i32));
+ SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+ DAG.getConstant(1023, MVT::i32));
+
+ // Extract the sign bit.
+ const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
+ SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+ // Extend back to to 64-bits.
+ SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ Zero, SignBit);
+ SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+ SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+ const SDValue FractMask
+ = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
+
+ SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+ SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+ SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+ const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+
+ SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+ SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+ SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+ SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+ return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+ SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+ SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+ SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+ SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+ SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+
+ APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+ SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+ return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+ // FNEARBYINT and FRINT are the same, except in their handling of FP
+ // exceptions. Those aren't really meaningful for us, and OpenCL only has
+ // rint, so just treat them as equivalent.
+ return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
@@ -1218,7 +1813,6 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
-
}
SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op,
@@ -1303,6 +1897,37 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
return DAG.getConstant(Src0 >> Offset, MVT::i32);
}
+SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector() || VT.getSizeInBits() > 32)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mul;
+
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
+ } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
+ } else {
+ return SDValue();
+ }
+
+ // We need to use sext even for MUL_U24, because MUL_U24 is used
+ // for signed multiply of 8 and 16-bit types.
+ return DAG.getSExtOrTrunc(Mul, DL, VT);
+}
+
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1310,34 +1935,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
switch(N->getOpcode()) {
default: break;
- case ISD::MUL: {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue Mul;
-
- // FIXME: Add support for 24-bit multiply with 64-bit output on SI.
- if (VT.isVector() || VT.getSizeInBits() > 32)
- break;
-
- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
- } else {
- break;
- }
-
- // We need to use sext even for MUL_U24, because MUL_U24 is used
- // for signed multiply of 8 and 16-bit types.
- SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT);
-
- return Reg;
- }
+ case ISD::MUL:
+ return performMulCombine(N, DCI);
case AMDGPUISD::MUL_I24:
case AMDGPUISD::MUL_U24: {
SDValue N0 = N->getOperand(0);
@@ -1511,29 +2110,38 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
// AMDIL DAG nodes
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
- NODE_NAME_CASE(DIV_INF);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(CLAMP)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ NODE_NAME_CASE(DIV_SCALE)
+ NODE_NAME_CASE(DIV_FMAS)
+ NODE_NAME_CASE(DIV_FIXUP)
+ NODE_NAME_CASE(TRIG_PREOP)
+ NODE_NAME_CASE(RCP)
+ NODE_NAME_CASE(RSQ)
+ NODE_NAME_CASE(RSQ_LEGACY)
+ NODE_NAME_CASE(RSQ_CLAMPED)
+ NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(BFE_U32)
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(BREV)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
- NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
@@ -1544,6 +2152,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(CVT_F32_UBYTE0)
+ NODE_NAME_CASE(CVT_F32_UBYTE1)
+ NODE_NAME_CASE(CVT_F32_UBYTE2)
+ NODE_NAME_CASE(CVT_F32_UBYTE3)
+ NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index d5d821d..98a92ad 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -42,10 +42,33 @@ private:
SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
+
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
+ unsigned BitsDiff,
+ SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
protected:
+ static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
+ static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
@@ -61,6 +84,7 @@ protected:
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
@@ -87,10 +111,16 @@ public:
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
MVT getVectorIdxTy() const override;
+ bool isSelectSupported(SelectSupportKind) const override;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool ShouldShrinkFPConstant(EVT VT) const override;
+
bool isLoadBitCastBeneficial(EVT, EVT) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -101,6 +131,7 @@ public:
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -128,38 +159,6 @@ public:
SDValue Op,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
-
-// Functions defined in AMDILISelLowering.cpp
-public:
- bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const override;
-
- /// We want to mark f32/f64 floating point values as legal.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
-
- /// We don't want to shrink f64/f32 constants.
- bool ShouldShrinkFPConstant(EVT VT) const override;
-
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
-private:
- void InitAMDILLowering();
- SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD {
@@ -169,12 +168,15 @@ enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
- DIV_INF, // Divide with infinity returned on zero divisor
RET_FLAG,
BRANCH_COND,
// End AMDIL ISD Opcodes
DWORDADDR,
FRACT,
+ CLAMP,
+
+ // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+ // Denormals handled on some parts.
COS_HW,
SIN_HW,
FMAX,
@@ -184,11 +186,23 @@ enum {
SMIN,
UMIN,
URECIP,
+ DIV_SCALE,
+ DIV_FMAS,
+ DIV_FIXUP,
+ TRIG_PREOP, // 1 ULP max error for f64
+
+ // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+ // For f64, max error 2^29 ULP, handles denormals.
+ RCP,
+ RSQ,
+ RSQ_LEGACY,
+ RSQ_CLAMPED,
DOT4,
BFE_U32, // Extract range of bits with zero extension to 32-bits.
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
+ BREV, // Reverse bits.
MUL_U24,
MUL_I24,
MAD_U24,
@@ -203,6 +217,21 @@ enum {
SAMPLEB,
SAMPLED,
SAMPLEL,
+
+ // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
+ CVT_F32_UBYTE0,
+ CVT_F32_UBYTE1,
+ CVT_F32_UBYTE2,
+ CVT_F32_UBYTE3,
+ /// This node is for VLIW targets and it is used to represent a vector
+ /// that is stored in consecutive registers with the same channel.
+ /// For example:
+ /// |X |Y|Z|W|
+ /// T0|v.x| | | |
+ /// T1|v.y| | | |
+ /// T2|v.z| | | |
+ /// T3|v.w| | | |
+ BUILD_VERTICAL_VECTOR,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index 1c3361a..fef5b8c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
-AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -320,33 +320,11 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+ Offset = MF.getTarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
-
-void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AMDGPURegisterInfo & RI = getRegisterInfo();
-
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- // Convert dst regclass to one that is supported by the ISA
- if (MO.isReg() && MO.isDef()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
- const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-
- assert(newRegClass);
-
- MRI.setRegClass(MO.getReg(), newRegClass);
- }
- }
- }
-}
-
int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
switch (Channels) {
default: return Opcode;
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 74baf6b..95dc8c1 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -33,7 +33,7 @@
namespace llvm {
-class AMDGPUTargetMachine;
+class AMDGPUSubtarget;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
@@ -45,9 +45,9 @@ private:
MachineBasicBlock &MBB) const;
virtual void anchor();
protected:
- TargetMachine &TM;
+ const AMDGPUSubtarget &ST;
public:
- explicit AMDGPUInstrInfo(TargetMachine &tm);
+ explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
@@ -137,14 +137,6 @@ public:
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
// Helper functions that check the opcode for status information
- bool isLoadInst(llvm::MachineInstr *MI) const;
- bool isExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isZExtLoadInst(llvm::MachineInstr *MI) const;
- bool isAExtLoadInst(llvm::MachineInstr *MI) const;
- bool isStoreInst(llvm::MachineInstr *MI) const;
- bool isTruncStoreInst(llvm::MachineInstr *MI) const;
bool isRegisterStore(const MachineInstr &MI) const;
bool isRegisterLoad(const MachineInstr &MI) const;
@@ -185,11 +177,6 @@ public:
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const = 0;
-
- /// \brief Convert the AMDIL MachineInstr to a supported ISA
- /// MachineInstr
- void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const;
-
/// \brief Build a MOV instruction.
virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index f96dbb4..934d59d 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+ [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -29,11 +37,25 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a) result clamped to +/- max_float.
+def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
+
// out = max(a, b) a and b are floats
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
@@ -59,12 +81,38 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+
+def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
+ SDTIntToFPOp, []>;
+
+
// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
// out = (2^32 / a) + e
// e is rounding error
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+// Special case divide FMA with scale and flags (src0 = Quotient,
+// src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayLoad]>;
@@ -92,6 +140,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
+
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
@@ -107,3 +157,22 @@ def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
[]
>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 80bdf5b..b86b781 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -49,6 +49,11 @@ def u8imm : Operand<i8> {
let PrintMethod = "printU8ImmOperand";
}
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
@@ -127,6 +132,21 @@ def COND_NULL : PatLeaf <
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
LoadSDNode *L = cast<LoadSDNode>(N);
return L->getExtensionType() == ISD::ZEXTLOAD ||
@@ -232,26 +252,55 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
-def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
- (atomic_load_add node:$ptr, node:$value), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
- (atomic_load_sub node:$ptr, node:$value), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+class local_binary_atomic_op<SDNode atomic_op> :
+ PatFrag<(ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value), [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+
+def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
+def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
+def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
+def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
+def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
+def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
+def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
+def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
+def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
+def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
+def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
+
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
+def atomic_cmp_swap_32_local :
+ PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i32 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def atomic_cmp_swap_64_local :
+ PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i64 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
+int FP32_NEG_ONE = 0xbf800000;
+int FP32_ONE = 0x3f800000;
}
def CONST : Constants;
@@ -273,7 +322,7 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
@@ -363,7 +412,7 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
// BFI_INT patterns
-multiclass BFIPatterns <Instruction BFI_INT> {
+multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
@@ -379,6 +428,19 @@ multiclass BFIPatterns <Instruction BFI_INT> {
(BFI_INT $x, $y, $z)
>;
+ def : Pat <
+ (fcopysign f32:$src0, f32:$src1),
+ (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
+ >;
+
+ def : Pat <
+ (f64 (fcopysign f64:$src0, f64:$src1)),
+ (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
+ (BFI_INT (LoadImm32 0x7fffffff),
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+ >;
}
// SHA-256 Ma patterns
@@ -457,6 +519,23 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
>;
}
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+ (fdiv FP_ONE, vt:$src),
+ (RcpInst $src)
+>;
+
+multiclass RsqPat<Instruction RsqInst, ValueType vt> {
+ def : Pat <
+ (fdiv FP_ONE, (fsqrt vt:$src)),
+ (RsqInst $src)
+ >;
+
+ def : Pat <
+ (AMDGPUrcp (fsqrt vt:$src)),
+ (RsqInst $src)
+ >;
+}
+
include "R600Instructions.td"
include "R700Instructions.td"
include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
index fab4a3b..58916a9 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.cpp
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
@@ -1,4 +1,4 @@
-//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//
//===-----------------------------------------------------------------------===//
-#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Intrinsics.h"
@@ -24,14 +24,12 @@ using namespace llvm;
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
- : TargetIntrinsicInfo() {
-}
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo() {}
-std::string
-AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
- unsigned int numTys) const {
- static const char* const names[] = {
+std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_NAME_TABLE
@@ -40,23 +38,23 @@ AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
if (IntrID < Intrinsic::num_intrinsics) {
return nullptr;
}
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
- && "Invalid intrinsic ID");
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+ "Invalid intrinsic ID");
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
return Result;
}
-unsigned int
-AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
+unsigned AMDGPUIntrinsicInfo::lookupName(const char *Name,
+ unsigned Len) const {
if (!StringRef(Name, Len).startswith("llvm."))
return 0; // All intrinsics start with 'llvm.'
#define GET_FUNCTION_RECOGNIZER
#include "AMDGPUGenIntrinsics.inc"
#undef GET_FUNCTION_RECOGNIZER
- AMDGPUIntrinsic::ID IntrinsicID
- = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ AMDGPUIntrinsic::ID IntrinsicID =
+ (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
@@ -65,17 +63,15 @@ AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
return 0;
}
-bool
-AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
- // Overload Table
+bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+// Overload Table
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
-Function*
-AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned numTys) const {
+Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const {
llvm_unreachable("Not implemented");
}
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDGPUIntrinsicInfo.h
index 924275a..5be68a2 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.h
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.h
@@ -1,4 +1,4 @@
-//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
-#ifndef AMDIL_INTRINSICS_H
-#define AMDIL_INTRINSICS_H
+#ifndef AMDGPU_INTRINSICINFO_H
+#define AMDGPU_INTRINSICINFO_H
#include "llvm/IR/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -34,16 +34,15 @@ enum ID {
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo(TargetMachine *tm);
- std::string getName(unsigned int IntrId, Type **Tys = nullptr,
- unsigned int numTys = 0) const override;
- unsigned int lookupName(const char *Name, unsigned int Len) const override;
- bool isOverloaded(unsigned int IID) const override;
- Function *getDeclaration(Module *M, unsigned int ID,
+ std::string getName(unsigned IntrId, Type **Tys = nullptr,
+ unsigned numTys = 0) const override;
+ unsigned lookupName(const char *Name, unsigned Len) const override;
+ bool isOverloaded(unsigned IID) const override;
+ Function *getDeclaration(Module *M, unsigned ID,
Type **Tys = nullptr,
- unsigned int numTys = 0) const override;
+ unsigned numTys = 0) const override;
};
} // end namespace llvm
-#endif // AMDIL_INTRINSICS_H
-
+#endif // AMDGPU_INTRINSICINFO_H
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 9ad5e72..d934676 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -18,18 +18,26 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-
+ def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_fract : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+
+ // This is named backwards (instead of rsq_legacy) so we don't have
+ // to define it with the public builtins intrinsics. This is a
+ // workaround for how intrinsic names are parsed. If the name is
+ // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
+ // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
+ def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
@@ -53,12 +61,27 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte3 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
+}
+
+// Legacy names for compatibility.
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_round_nearest : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index b759495..ac82e88 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "R600InstrInfo.h"
#include "SIInstrInfo.h"
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
index 2b7f1e3..58fe34d 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -14,9 +14,9 @@
namespace llvm {
class AMDGPUSubtarget;
-class MCInst;
-class MCContext;
class MachineInstr;
+class MCContext;
+class MCInst;
class AMDGPUMCInstLower {
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
new file mode 100644
index 0000000..218750d
--- /dev/null
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -0,0 +1,387 @@
+//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates allocas by either converting them into vectors or
+// by migrating them to local address space.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "amdgpu-promote-alloca"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteAlloca : public FunctionPass,
+ public InstVisitor<AMDGPUPromoteAlloca> {
+
+ static char ID;
+ Module *Mod;
+ const AMDGPUSubtarget &ST;
+ int LocalMemAvailable;
+
+public:
+ AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
+ LocalMemAvailable(0) { }
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "AMDGPU Promote Alloca";
+ }
+ void visitAlloca(AllocaInst &I);
+};
+
+} // End anonymous namespace
+
+char AMDGPUPromoteAlloca::ID = 0;
+
+bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
+ Mod = &M;
+ return false;
+}
+
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+
+ const FunctionType *FTy = F.getFunctionType();
+
+ LocalMemAvailable = ST.getLocalMemorySize();
+
+
+ // If the function has any arguments in the local address space, then it's
+ // possible these arguments require the entire local memory space, so
+ // we cannot use local memory in the pass.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ const Type *ParamTy = FTy->getParamType(i);
+ if (ParamTy->isPointerTy() &&
+ ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ LocalMemAvailable = 0;
+ DEBUG(dbgs() << "Function has local memory argument. Promoting to "
+ "local memory disabled.\n");
+ break;
+ }
+ }
+
+ if (LocalMemAvailable > 0) {
+ // Check how much local memory is being used by global objects
+ for (Module::global_iterator I = Mod->global_begin(),
+ E = Mod->global_end(); I != E; ++I) {
+ GlobalVariable *GV = I;
+ PointerType *GVTy = GV->getType();
+ if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ continue;
+ for (Value::use_iterator U = GV->use_begin(),
+ UE = GV->use_end(); U != UE; ++U) {
+ Instruction *Use = dyn_cast<Instruction>(*U);
+ if (!Use)
+ continue;
+ if (Use->getParent()->getParent() == &F)
+ LocalMemAvailable -=
+ Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ }
+ }
+ }
+
+ LocalMemAvailable = std::max(0, LocalMemAvailable);
+ DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
+
+ visit(F);
+
+ return false;
+}
+
+static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+ return VectorType::get(ArrayTy->getArrayElementType(),
+ ArrayTy->getArrayNumElements());
+}
+
+static Value* calculateVectorIndex(Value *Ptr,
+ std::map<GetElementPtrInst*, Value*> GEPIdx) {
+ if (isa<AllocaInst>(Ptr))
+ return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+
+ return GEPIdx[GEP];
+}
+
+static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
+ // FIXME we only support simple cases
+ if (GEP->getNumOperands() != 3)
+ return NULL;
+
+ ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (!I0 || !I0->isZero())
+ return NULL;
+
+ return GEP->getOperand(2);
+}
+
+// Not an instruction handled below to turn into a vector.
+//
+// TODO: Check isTriviallyVectorizable for calls and handle other
+// instructions.
+static bool canVectorizeInst(Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+ Type *AllocaTy = Alloca->getAllocatedType();
+
+ DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
+
+ // FIXME: There is no reason why we can't support larger arrays, we
+ // are just being conservative for now.
+ if (!AllocaTy->isArrayTy() ||
+ AllocaTy->getArrayElementType()->isVectorTy() ||
+ AllocaTy->getArrayNumElements() > 4) {
+
+ DEBUG(dbgs() << " Cannot convert type to vector");
+ return false;
+ }
+
+ std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+ std::vector<Value*> WorkList;
+ for (User *AllocaUser : Alloca->users()) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
+ if (!GEP) {
+ if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+ return false;
+
+ WorkList.push_back(AllocaUser);
+ continue;
+ }
+
+ Value *Index = GEPToVectorIndex(GEP);
+
+ // If we can't compute a vector index from this GEP, then we can't
+ // promote this alloca to vector.
+ if (!Index) {
+ DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
+ return false;
+ }
+
+ GEPVectorIdx[GEP] = Index;
+ for (User *GEPUser : AllocaUser->users()) {
+ if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+ return false;
+
+ WorkList.push_back(GEPUser);
+ }
+ }
+
+ VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+
+ DEBUG(dbgs() << " Converting alloca to vector "
+ << *AllocaTy << " -> " << *VectorTy << '\n');
+
+ for (std::vector<Value*>::iterator I = WorkList.begin(),
+ E = WorkList.end(); I != E; ++I) {
+ Instruction *Inst = cast<Instruction>(*I);
+ IRBuilder<> Builder(Inst);
+ switch (Inst->getOpcode()) {
+ case Instruction::Load: {
+ Value *Ptr = Inst->getOperand(0);
+ Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+ Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+ Inst->replaceAllUsesWith(ExtractElement);
+ Inst->eraseFromParent();
+ break;
+ }
+ case Instruction::Store: {
+ Value *Ptr = Inst->getOperand(1);
+ Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+ Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *NewVecValue = Builder.CreateInsertElement(VecValue,
+ Inst->getOperand(0),
+ Index);
+ Builder.CreateStore(NewVecValue, BitCast);
+ Inst->eraseFromParent();
+ break;
+ }
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ break;
+
+ default:
+ Inst->dump();
+ llvm_unreachable("Inconsistency in instructions promotable to vector");
+ }
+ }
+ return true;
+}
+
+static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+ for (User *User : Val->users()) {
+ if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+ continue;
+ if (isa<CallInst>(User)) {
+ WorkList.push_back(User);
+ continue;
+ }
+ if (!User->getType()->isPointerTy())
+ continue;
+ WorkList.push_back(User);
+ collectUsesWithPtrTypes(User, WorkList);
+ }
+}
+
+void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+ IRBuilder<> Builder(&I);
+
+ // First try to replace the alloca with a vector
+ Type *AllocaTy = I.getAllocatedType();
+
+ DEBUG(dbgs() << "Trying to promote " << I << '\n');
+
+ if (tryPromoteAllocaToVector(&I))
+ return;
+
+ DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
+
+ // FIXME: This is the maximum work group size. We should try to get
+ // value from the reqd_work_group_size function attribute if it is
+ // available.
+ unsigned WorkGroupSize = 256;
+ int AllocaSize = WorkGroupSize *
+ Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+
+ if (AllocaSize > LocalMemAvailable) {
+ DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Promoting alloca to local memory\n");
+ LocalMemAvailable -= AllocaSize;
+
+ GlobalVariable *GV = new GlobalVariable(
+ *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
+ GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+
+ FunctionType *FTy = FunctionType::get(
+ Type::getInt32Ty(Mod->getContext()), false);
+ AttributeSet AttrSet;
+ AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
+
+ Value *ReadLocalSizeY = Mod->getOrInsertFunction(
+ "llvm.r600.read.local.size.y", FTy, AttrSet);
+ Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
+ "llvm.r600.read.local.size.z", FTy, AttrSet);
+ Value *ReadTIDIGX = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.x", FTy, AttrSet);
+ Value *ReadTIDIGY = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.y", FTy, AttrSet);
+ Value *ReadTIDIGZ = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.z", FTy, AttrSet);
+
+
+ Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
+ Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
+ Value *TIdX = Builder.CreateCall(ReadTIDIGX);
+ Value *TIdY = Builder.CreateCall(ReadTIDIGY);
+ Value *TIdZ = Builder.CreateCall(ReadTIDIGZ);
+
+ Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
+ Tmp0 = Builder.CreateMul(Tmp0, TIdX);
+ Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
+ Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
+ TID = Builder.CreateAdd(TID, TIdZ);
+
+ std::vector<Value*> Indices;
+ Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
+ Indices.push_back(TID);
+
+ Value *Offset = Builder.CreateGEP(GV, Indices);
+ I.mutateType(Offset->getType());
+ I.replaceAllUsesWith(Offset);
+ I.eraseFromParent();
+
+ std::vector<Value*> WorkList;
+
+ collectUsesWithPtrTypes(Offset, WorkList);
+
+ for (std::vector<Value*>::iterator i = WorkList.begin(),
+ e = WorkList.end(); i != e; ++i) {
+ Value *V = *i;
+ CallInst *Call = dyn_cast<CallInst>(V);
+ if (!Call) {
+ Type *EltTy = V->getType()->getPointerElementType();
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ V->mutateType(NewTy);
+ continue;
+ }
+
+ IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
+ if (!Intr) {
+ std::vector<Type*> ArgTypes;
+ for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
+ ArgIdx != ArgEnd; ++ArgIdx) {
+ ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
+ }
+ Function *F = Call->getCalledFunction();
+ FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
+ F->isVarArg());
+ Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
+ F->getAttributes());
+ Function *NewF = cast<Function>(C);
+ Call->setCalledFunction(NewF);
+ continue;
+ }
+
+ Builder.SetInsertPoint(Intr);
+ switch (Intr->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // These intrinsics are for address space 0 only
+ Intr->eraseFromParent();
+ continue;
+ case Intrinsic::memcpy: {
+ MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
+ Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
+ MemCpy->getLength(), MemCpy->getAlignment(),
+ MemCpy->isVolatile());
+ Intr->eraseFromParent();
+ continue;
+ }
+ case Intrinsic::memset: {
+ MemSetInst *MemSet = cast<MemSetInst>(Intr);
+ Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
+ MemSet->getLength(), MemSet->getAlignment(),
+ MemSet->isVolatile());
+ Intr->eraseFromParent();
+ continue;
+ }
+ default:
+ Intr->dump();
+ llvm_unreachable("Don't know how to promote alloca intrinsic use.");
+ }
+ }
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
+ return new AMDGPUPromoteAlloca(ST);
+}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 19927fa..3433280 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,9 +17,9 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm)
+AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
: AMDGPUGenRegisterInfo(0),
- TM(tm)
+ ST(st)
{ }
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index a7cba0d..4731595 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -25,27 +25,19 @@
namespace llvm {
-class AMDGPUTargetMachine;
+class AMDGPUSubtarget;
class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
- TargetMachine &TM;
static const MCPhysReg CalleeSavedReg;
+ const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(TargetMachine &tm);
+ AMDGPURegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
}
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns The ISA reg class that is equivalent to \p RC.
- virtual const TargetRegisterClass * getISARegClass(
- const TargetRegisterClass * RC) const {
- assert(!"Unimplemented"); return nullptr;
- }
-
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
assert(!"Unimplemented"); return nullptr;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index f3b9932..b83c290 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "SIInstrInfo.h"
using namespace llvm;
@@ -23,90 +25,42 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
- AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
- InstrItins = getInstrItineraryForCPU(CPU);
-
- // Default card
- StringRef GPU = CPU;
- Is64bit = false;
- HasVertexCache = false;
- TexVTXClauseSize = 0;
- Gen = AMDGPUSubtarget::R600;
- FP64 = false;
- CaymanISA = false;
- EnableIRStructurizer = true;
- EnableIfCvt = true;
- WavefrontSize = 0;
- CFALUBug = false;
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ DevName(GPU),
+ Is64bit(false),
+ DumpCode(false),
+ R600ALUInst(false),
+ HasVertexCache(false),
+ TexVTXClauseSize(0),
+ Gen(AMDGPUSubtarget::R600),
+ FP64(false),
+ CaymanISA(false),
+ EnableIRStructurizer(true),
+ EnableIfCvt(true),
+ WavefrontSize(0),
+ CFALUBug(false),
+ LocalMemorySize(0),
+ InstrItins(getInstrItineraryForCPU(GPU)) {
ParseSubtargetFeatures(GPU, FS);
- DevName = GPU;
-}
-bool
-AMDGPUSubtarget::is64bit() const {
- return Is64bit;
-}
-bool
-AMDGPUSubtarget::hasVertexCache() const {
- return HasVertexCache;
-}
-short
-AMDGPUSubtarget::getTexVTXClauseSize() const {
- return TexVTXClauseSize;
-}
-enum AMDGPUSubtarget::Generation
-AMDGPUSubtarget::getGeneration() const {
- return Gen;
-}
-bool
-AMDGPUSubtarget::hasHWFP64() const {
- return FP64;
-}
-bool
-AMDGPUSubtarget::hasCaymanISA() const {
- return CaymanISA;
-}
-bool
-AMDGPUSubtarget::IsIRStructurizerEnabled() const {
- return EnableIRStructurizer;
-}
-bool
-AMDGPUSubtarget::isIfCvtEnabled() const {
- return EnableIfCvt;
-}
-unsigned
-AMDGPUSubtarget::getWavefrontSize() const {
- return WavefrontSize;
+ if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ InstrInfo.reset(new R600InstrInfo(*this));
+ } else {
+ InstrInfo.reset(new SIInstrInfo(*this));
+ }
}
-unsigned
-AMDGPUSubtarget::getStackEntrySize() const {
+
+unsigned AMDGPUSubtarget::getStackEntrySize() const {
assert(getGeneration() <= NORTHERN_ISLANDS);
switch(getWavefrontSize()) {
case 16:
return 8;
case 32:
- if (hasCaymanISA())
- return 4;
- else
- return 8;
+ return hasCaymanISA() ? 4 : 8;
case 64:
return 4;
default:
llvm_unreachable("Illegal wavefront size.");
}
}
-bool
-AMDGPUSubtarget::hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
-}
-bool
-AMDGPUSubtarget::isTargetELF() const {
- return false;
-}
-
-std::string
-AMDGPUSubtarget::getDeviceName() const {
- return DevName;
-}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b041d6..0c388b3 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -15,6 +15,7 @@
#ifndef AMDGPUSUBTARGET_H
#define AMDGPUSUBTARGET_H
#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -27,6 +28,9 @@
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+
+ std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
+
public:
enum Generation {
R600 = 0,
@@ -40,42 +44,78 @@ public:
private:
std::string DevName;
bool Is64bit;
- bool Is32on64bit;
bool DumpCode;
bool R600ALUInst;
bool HasVertexCache;
short TexVTXClauseSize;
- enum Generation Gen;
+ Generation Gen;
bool FP64;
bool CaymanISA;
bool EnableIRStructurizer;
bool EnableIfCvt;
unsigned WavefrontSize;
bool CFALUBug;
+ int LocalMemorySize;
InstrItineraryData InstrItins;
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const AMDGPUInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool is64bit() const;
- bool hasVertexCache() const;
- short getTexVTXClauseSize() const;
- enum Generation getGeneration() const;
- bool hasHWFP64() const;
- bool hasCaymanISA() const;
+ bool is64bit() const {
+ return Is64bit;
+ }
+
+ bool hasVertexCache() const {
+ return HasVertexCache;
+ }
+
+ short getTexVTXClauseSize() const {
+ return TexVTXClauseSize;
+ }
+
+ Generation getGeneration() const {
+ return Gen;
+ }
+
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
+ bool hasBFI() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
bool hasBFM() const {
return hasBFE();
}
+ bool hasBCNT(unsigned Size) const {
+ if (Size == 32)
+ return (getGeneration() >= EVERGREEN);
+
+ assert(Size == 64);
+ return (getGeneration() >= SOUTHERN_ISLANDS);
+ }
+
bool hasMulU24() const {
return (getGeneration() >= EVERGREEN);
}
@@ -85,22 +125,48 @@ public:
hasCaymanISA());
}
- bool IsIRStructurizerEnabled() const;
- bool isIfCvtEnabled() const;
- unsigned getWavefrontSize() const;
+ bool IsIRStructurizerEnabled() const {
+ return EnableIRStructurizer;
+ }
+
+ bool isIfCvtEnabled() const {
+ return EnableIfCvt;
+ }
+
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
+ }
+
unsigned getStackEntrySize() const;
- bool hasCFAluBug() const;
+
+ bool hasCFAluBug() const {
+ assert(getGeneration() <= NORTHERN_ISLANDS);
+ return CFALUBug;
+ }
+
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
+ }
bool enableMachineScheduler() const override {
return getGeneration() <= NORTHERN_ISLANDS;
}
// Helper functions to simplify if statements
- bool isTargetELF() const;
- std::string getDeviceName() const;
- bool dumpCode() const { return DumpCode; }
- bool r600ALUEncoding() const { return R600ALUInst; }
+ bool isTargetELF() const {
+ return false;
+ }
+ StringRef getDeviceName() const {
+ return DevName;
+ }
+
+ bool dumpCode() const {
+ return DumpCode;
+ }
+ bool r600ALUEncoding() const {
+ return R600ALUInst;
+ }
};
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 174fdca..8aab944 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -80,10 +80,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
InstrItins(&Subtarget.getInstrItineraryData()) {
// TLInfo uses InstrInfo so it must be initialized after.
if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
TLInfo.reset(new R600TargetLowering(*this));
} else {
- InstrInfo.reset(new SIInstrInfo(*this));
TLInfo.reset(new SITargetLowering(*this));
}
setRequiresStructuredCFG(true);
@@ -111,6 +109,7 @@ public:
return nullptr;
}
+ virtual void addCodeGenPrepare();
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@@ -136,6 +135,13 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
PM.add(createAMDGPUTargetTransformInfoPass(this));
}
+void AMDGPUPassConfig::addCodeGenPrepare() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ addPass(createAMDGPUPromoteAlloca(ST));
+ addPass(createSROAPass());
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
@@ -159,7 +165,6 @@ bool AMDGPUPassConfig::addInstSelector() {
}
bool AMDGPUPassConfig::addPreRegAlloc() {
- addPass(createAMDGPUConvertToISAPass(*TM));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
@@ -169,6 +174,8 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
// SIFixSGPRCopies can generate a lot of duplicate instructions,
// so we need to run MachineCSE afterwards.
addPass(&MachineCSEID);
+ initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
+ insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID);
}
return false;
}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 1287e13..3bb15be 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -17,8 +17,8 @@
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
#include "llvm/IR/DataLayout.h"
@@ -30,7 +30,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
const DataLayout Layout;
AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
const InstrItineraryData *InstrItins;
@@ -46,13 +45,13 @@ public:
return &IntrinsicInfo;
}
const AMDGPUInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
+ return getSubtargetImpl()->getInstrInfo();
}
const AMDGPUSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
AMDGPUTargetLowering *getTargetLowering() const override {
return TLInfo.get();
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
deleted file mode 100644
index 5dcd478..0000000
--- a/lib/Target/R600/AMDILBase.td
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-// Dummy Instruction itineraries for pseudo instructions
-def ALU_NULL : FuncUnit;
-def NullALU : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-
-include "AMDILRegisterInfo.td"
-include "AMDILInstrInfo.td"
-
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
deleted file mode 100644
index 7cea803..0000000
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ /dev/null
@@ -1,560 +0,0 @@
-//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief TargetLowering functions borrowed from AMDIL.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions End
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Class Implementation Begins
-//===----------------------------------------------------------------------===//
-void AMDGPUTargetLowering::InitAMDILLowering() {
- static const MVT::SimpleValueType types[] = {
- MVT::i8,
- MVT::i16,
- MVT::i32,
- MVT::f32,
- MVT::f64,
- MVT::i64,
- MVT::v2i8,
- MVT::v4i8,
- MVT::v2i16,
- MVT::v4i16,
- MVT::v4f32,
- MVT::v4i32,
- MVT::v2f32,
- MVT::v2i32,
- MVT::v2f64,
- MVT::v2i64
- };
-
- static const MVT::SimpleValueType IntTypes[] = {
- MVT::i8,
- MVT::i16,
- MVT::i32,
- MVT::i64
- };
-
- static const MVT::SimpleValueType FloatTypes[] = {
- MVT::f32,
- MVT::f64
- };
-
- static const MVT::SimpleValueType VectorTypes[] = {
- MVT::v2i8,
- MVT::v4i8,
- MVT::v2i16,
- MVT::v4i16,
- MVT::v4f32,
- MVT::v4i32,
- MVT::v2f32,
- MVT::v2i32,
- MVT::v2f64,
- MVT::v2i64
- };
-
- const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
- // These are the current register classes that are
- // supported
-
- for (MVT VT : types) {
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_JT, VT, Expand);
- setOperationAction(ISD::BRIND, VT, Expand);
- // TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- if (VT != MVT::i64 && VT != MVT::v2i64) {
- setOperationAction(ISD::SDIV, VT, Custom);
- }
- }
- for (MVT VT : FloatTypes) {
- // IL does not have these operations for floating point types
- setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
- }
-
- for (MVT VT : IntTypes) {
- // GPU also does not have divrem function for signed or unsigned
- setOperationAction(ISD::SDIVREM, VT, Expand);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- setOperationAction(ISD::BSWAP, VT, Expand);
-
- // GPU doesn't have any counting operators
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- for (MVT VT : VectorTypes) {
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
-
- }
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
- if (STM.hasHWFP64()) {
- // we support loading/storing v2f64 but not operations on the type
- setOperationAction(ISD::FADD, MVT::v2f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- // We want to expand vector conversions into their scalar
- // counterparts.
- setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::v2f64, Expand);
- }
- // TODO: Fix the UDIV24 algorithm so it works for these
- // types correctly. This needs vector comparisons
- // for this to work correctly.
- setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SUBC, MVT::Other, Expand);
- setOperationAction(ISD::ADDE, MVT::Other, Expand);
- setOperationAction(ISD::ADDC, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
-
-
- // Use the default implementation.
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
-
- setSchedulingPreference(Sched::RegPressure);
- setPow2DivIsCheap(false);
- setSelectIsExpensive(true);
- setJumpIsExpensive(true);
-
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
-
-}
-
-bool
-AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const {
- return false;
-}
-
-// The backend supports 32 and 64 bit floating point immediates
-bool
-AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return true;
- } else {
- return false;
- }
-}
-
-bool
-AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return false;
- } else {
- return true;
- }
-}
-
-
-// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-// be zero. Op is expected to be a target specific node. Used by DAG
-// combiner.
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-EVT
-AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
- int iSize = (size * numEle);
- int vEle = (iSize >> ((size == 64) ? 6 : 5));
- if (!vEle) {
- vEle = 1;
- }
- if (size == 64) {
- if (vEle == 1) {
- return EVT(MVT::i64);
- } else {
- return EVT(MVT::getVectorVT(MVT::i64, vEle));
- }
- } else {
- if (vEle == 1) {
- return EVT(MVT::i32);
- } else {
- return EVT(MVT::getVectorVT(MVT::i32, vEle));
- }
- }
-}
-
-SDValue
-AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
- SDValue Result;
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- SDLoc(Op),
- Op.getValueType(),
- Chain, Jump, Cond);
- return Result;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-
- // int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
- DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
-
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
deleted file mode 100644
index 0f0c88d..0000000
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ /dev/null
@@ -1,150 +0,0 @@
-//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file describes the AMDIL instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-//===--------------------------------------------------------------------===//
-// Custom Operands
-//===--------------------------------------------------------------------===//
-def brtarget : Operand<OtherVT>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Type Profiles
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Generic Profile Types
-//===----------------------------------------------------------------------===//
-
-def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
- ]>;
-def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
- ]>;
-def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
- SDTCisEltOfVec<1, 0>
- ]>;
-
-//===----------------------------------------------------------------------===//
-// Flow Control Profile Types
-//===----------------------------------------------------------------------===//
-// Branch instruction where second and third are basic blocks
-def SDTIL_BRCond : SDTypeProfile<0, 2, [
- SDTCisVT<0, OtherVT>
- ]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Nodes
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Flow Control DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// Call/Return DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-//===--------------------------------------------------------------------===//
-// Instructions
-//===--------------------------------------------------------------------===//
-// Floating point math functions
-def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-
-//===----------------------------------------------------------------------===//
-// Integer functions
-//===----------------------------------------------------------------------===//
-def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Pattern DAG Nodes
-//===--------------------------------------------------------------------===//
-def global_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Load pattern fragments
-//===----------------------------------------------------------------------===//
-// Global address space loads
-def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-// Constant address space loads
-def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Complex addressing mode patterns
-//===----------------------------------------------------------------------===//
-def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
-def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
-def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
-def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
-
-//===----------------------------------------------------------------------===//
-// Instruction format classes
-//===----------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// Multiclass Instruction formats
-//===--------------------------------------------------------------------===//
-// Multiclass that handles branch instructions
-multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
- def _i32 : ILFormat<(outs),
- (ins brtarget:$target, rci:$src0),
- "; i32 Pseudo branch instruction",
- [(Op bb:$target, (i32 rci:$src0))]>;
- def _f32 : ILFormat<(outs),
- (ins brtarget:$target, rcf:$src0),
- "; f32 Pseudo branch instruction",
- [(Op bb:$target, (f32 rcf:$src0))]>;
-}
-
-// Only scalar types should generate flow control
-multiclass BranchInstr<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src),
- !strconcat(name, " $src"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src),
- !strconcat(name, " $src"), []>;
-}
-// Only scalar types should generate flow control
-multiclass BranchInstr2<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
-}
-
-//===--------------------------------------------------------------------===//
-// Intrinsics support
-//===--------------------------------------------------------------------===//
-include "AMDILIntrinsics.td"
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
deleted file mode 100644
index 4a3e02e..0000000
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines all of the amdil-specific intrinsics
-//
-//===---------------------------------------------------------------===//
-//===--------------------------------------------------------------------===//
-// Intrinsic classes
-// Generic versions of the above classes but for Target specific intrinsics
-// instead of SDNode patterns.
-//===--------------------------------------------------------------------===//
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- class VoidIntLong :
- Intrinsic<[llvm_i64_ty], [], []>;
- class VoidIntInt :
- Intrinsic<[llvm_i32_ty], [], []>;
- class VoidIntBool :
- Intrinsic<[llvm_i32_ty], [], []>;
- class UnaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class ConvertIntFTOI :
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
- class ConvertIntITOF :
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
- class UnaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty], []>;
- class UnaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty], []>;
- class BinaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
- class BinaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
- class TernaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class TernaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class QuaternaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
- class UnaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
-
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
-
- def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
- UnaryIntInt;
- def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
- UnaryIntInt;
- def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
- TernaryIntInt;
- def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
- TernaryIntInt;
- def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
- QuaternaryIntInt;
- def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
- TernaryIntInt;
- def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
- BinaryIntInt;
- def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
- BinaryIntInt;
- def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
- BinaryIntInt;
- def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
- BinaryIntInt;
- def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
- BinaryIntInt;
- def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
- BinaryIntInt;
- def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
- BinaryIntFloat;
- def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
- BinaryIntInt;
- def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
- BinaryIntInt;
- def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
- BinaryIntFloat;
- def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
- TernaryIntInt;
- def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
- TernaryIntInt;
- def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
- TernaryIntInt;
- def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
- UnaryIntFloat;
- def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
- TernaryIntFloat;
- def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
- UnaryIntFloat;
- def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
- UnaryIntFloat;
- def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
- UnaryIntFloat;
- def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
- UnaryIntFloat;
- def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
- UnaryIntFloat;
- def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
- UnaryIntFloat;
- def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
- UnaryIntFloat;
- def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
- UnaryIntFloat;
- def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
- UnaryIntFloat;
- def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
- UnaryIntFloat;
- def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
- UnaryIntFloat;
- def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
- UnaryIntFloat;
- def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
- def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
- def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
- def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
- UnaryIntFloat;
- def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
- UnaryIntFloat;
- def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
- UnaryIntFloat;
- def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
- UnaryIntFloat;
- def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
- UnaryIntFloat;
- def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
- UnaryIntFloat;
- def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
- UnaryIntFloat;
- def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
- UnaryIntFloat;
- def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
- TernaryIntFloat;
- def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
- UnaryIntFloat;
- def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
- UnaryIntFloat;
- def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
- UnaryIntFloat;
- def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
- TernaryIntFloat;
- def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i32_ty], []>;
-
- def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
- Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
- def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
- def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
- def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
- ConvertIntITOF;
- def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
- ConvertIntFTOI;
- def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
- def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
- ConvertIntITOF;
- def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty, llvm_float_ty], []>;
- def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty], []>;
- def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
- def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
-}
diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td
deleted file mode 100644
index b9d0334..0000000
--- a/lib/Target/R600/AMDILRegisterInfo.td
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Declarations that describe the AMDIL register file
-//
-//===----------------------------------------------------------------------===//
-
-class AMDILReg<bits<16> num, string n> : Register<n> {
- field bits<16> Value;
- let Value = num;
- let Namespace = "AMDGPU";
-}
-
-// We will start with 8 registers for each class before expanding to more
-// Since the swizzle is added based on the register class, we can leave it
-// off here and just specify different registers for different register classes
-def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
-def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
-def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
-def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
-def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
-def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
-def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
-def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
-def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
-def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
-def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
-def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
-def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
-def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
-def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
-def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
-def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
-def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
-def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
-def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
-
-// All registers between 1000 and 1024 are reserved and cannot be used
-// unless commented in this section
-// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
-// r1020 is used to hold the frame index for local arrays
-// r1019 is used to hold the dynamic stack allocation pointer
-// r1018 is used as a temporary register for handwritten code
-// r1017 is used as a temporary register for handwritten code
-// r1016 is used as a temporary register for load/store code
-// r1015 is used as a temporary register for data segment offset
-// r1014 is used as a temporary register for store code
-// r1013 is used as the section data pointer register
-// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
-// r1009 is used as the frame pointer register
-// r999 is used as the mem register.
-// r998 is used as the return address register.
-//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
-//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
-//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
-//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
-//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
-//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
-def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
-def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
-def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
-def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
-def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
-def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
-def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
-def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
-def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
-def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
-def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
-def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
-def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
-def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
-def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
-def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
-def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
-def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
-def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
-def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
-def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
-def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
-def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 3c6fa5a..4d16082 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -13,10 +13,9 @@ add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(R600CodeGen
AMDILCFGStructurizer.cpp
- AMDILIntrinsicInfo.cpp
- AMDILISelLowering.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
+ AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
@@ -24,8 +23,8 @@ add_llvm_target(R600CodeGen
AMDGPUTargetMachine.cpp
AMDGPUTargetTransformInfo.cpp
AMDGPUISelLowering.cpp
- AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
+ AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
@@ -41,6 +40,7 @@ add_llvm_target(R600CodeGen
R600TextureIntrinsicsReplacer.cpp
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
+ SIFixSGPRLiveRanges.cpp
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 2065441..dcb7e98 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
def : Pat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-defm : BFIPatterns <BFI_INT_eg>;
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
@@ -326,6 +326,8 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
def DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+
let hasSideEffects = 1 in {
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
}
@@ -346,7 +348,7 @@ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
def GROUP_BARRIER : InstR600 <
- (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+ (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
R600ALU_Word0,
R600ALU_Word1_OP2 <0x54> {
@@ -375,6 +377,11 @@ def GROUP_BARRIER : InstR600 <
let ALUInst = 1;
}
+def : Pat <
+ (int_AMDGPU_barrier_global),
+ (GROUP_BARRIER)
+>;
+
//===----------------------------------------------------------------------===//
// LDS Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 11ae091..0927040 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -99,9 +99,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
return;
}
- // The low 8 bits encoding value is the register index, for both VGPRs and
- // SGPRs.
- unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+ // The low 8 bits of the encoding value is the register index, for both VGPRs
+ // and SGPRs.
+ unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
if (NumRegs == 1) {
O << Type << RegIdx;
return;
@@ -216,13 +216,8 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- union Literal {
- float f;
- int32_t i;
- } L;
-
- L.i = MI->getOperand(OpNo).getImm();
- O << L.i << "(" << L.f << ")";
+ int32_t Imm = MI->getOperand(OpNo).getImm();
+ O << Imm << '(' << BitsToFloat(Imm) << ')';
}
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 5e7cefe..dc1344f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixup,
const MCSubtargetInfo &STI) const {
if (MO.isReg()) {
- if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
return MRI.getEncodingValue(MO.getReg());
- } else {
- return getHWReg(MO.getReg());
- }
- } else if (MO.isImm()) {
- return MO.getImm();
- } else {
- assert(0);
- return 0;
+ return getHWReg(MO.getReg());
}
+
+ assert(MO.isImm());
+ return MO.getImm();
}
#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index d255e96..d98a6db 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index d6c6830..7f3560a 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -13,6 +13,9 @@
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
@@ -65,6 +68,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
@@ -133,19 +137,47 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setOperationAction(ISD::SUB, MVT::i64, Expand);
+
// These should be replaced by UDVIREM, but it does not happen automatically
// during Type Legalization
setOperationAction(ISD::UDIV, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::i64, Custom);
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+
+ // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
+ // to be Legal/Custom in order to avoid library calls.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+ for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ }
+
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::Source);
@@ -537,11 +569,24 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
case ISD::FCOS:
case ISD::FSIN: return LowerTrig(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::LOAD: {
+ SDValue Result = LowerLOAD(Op, DAG);
+ assert((!Result.getNode() ||
+ Result.getNode()->getNumValues() == 2) &&
+ "Load should return a value and a chain");
+ return Result;
+ }
+
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -776,6 +821,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
+ case Intrinsic::AMDGPU_rsq:
+ // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
+ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -793,20 +841,172 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
return;
- case ISD::LOAD: {
- SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
- Results.push_back(SDValue(Node, 0));
- Results.push_back(SDValue(Node, 1));
- // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
- // function
- DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
- return;
+ case ISD::UDIV: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM);
+ break;
}
- case ISD::STORE:
- SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
- Results.push_back(SDValue(Node, 0));
- return;
+ case ISD::UREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM.getValue(1));
+ break;
+ }
+ case ISD::SDIV: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(SDIVREM);
+ break;
+ }
+ case ISD::SREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(SDIVREM.getValue(1));
+ break;
+ }
+ case ISD::SDIVREM: {
+ SDValue Op = SDValue(N, 1);
+ SDValue RES = LowerSDIVREM(Op, DAG);
+ Results.push_back(RES);
+ Results.push_back(RES.getValue(1));
+ break;
+ }
+ case ISD::UDIVREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
+
+ //HiLo split
+ SDValue LHS = N->getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
+
+ SDValue RHS = N->getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
+
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
+
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
+
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
+
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
+
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
+
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
+
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
+
+ // Update REM
+
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
+
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+ break;
}
+ }
+}
+
+SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
+ SDValue Vector) const {
+
+ SDLoc DL(Vector);
+ EVT VecVT = Vector.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SmallVector<SDValue, 8> Args;
+
+ for (unsigned i = 0, e = VecVT.getVectorNumElements();
+ i != e; ++i) {
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Vector, DAG.getConstant(i, getVectorIdxTy())));
+ }
+
+ return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
+}
+
+SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+ SDValue Vector = Op.getOperand(0);
+ SDValue Index = Op.getOperand(1);
+
+ if (isa<ConstantSDNode>(Index) ||
+ Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ return Op;
+
+ Vector = vectorToVerticalVector(DAG, Vector);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
+ Vector, Index);
+}
+
+SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Vector = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+
+ if (isa<ConstantSDNode>(Index) ||
+ Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ return Op;
+
+ Vector = vectorToVerticalVector(DAG, Vector);
+ SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
+ Vector, Value, Index);
+ return vectorToVerticalVector(DAG, Insert);
}
SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
@@ -840,6 +1040,80 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstantFP(3.14159265359, MVT::f32));
}
+SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shift = Op.getOperand(2);
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue One = DAG.getConstant(1, VT);
+
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+ SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+ // The dance around Width1 is necessary for 0 special case.
+ // Without it the CompShift might be 32, producing incorrect results in
+ // Overflow. So we do the shift in two steps, the alternative is to
+ // add a conditional to filter the special case.
+
+ SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
+ Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
+
+ SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
+ HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
+ SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
+
+ SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
+ SDValue LoBig = Zero;
+
+ Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+ Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
+SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shift = Op.getOperand(2);
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue One = DAG.getConstant(1, VT);
+
+ const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
+
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+ SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+ // The dance around Width1 is necessary for 0 special case.
+ // Without it the CompShift might be 32, producing incorrect results in
+ // Overflow. So we do the shift in two steps, the alternative is to
+ // add a conditional to filter the special case.
+
+ SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
+ Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
+
+ SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
+ SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
+ LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
+
+ SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
+ SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
+
+ Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+ Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::SETCC,
@@ -1369,6 +1643,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+
+ return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
+ Chain, Jump, Cond);
+}
+
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
@@ -1902,9 +2185,8 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue FakeOp;
std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
- I != E; ++I)
- Ops.push_back(*I);
+ for (const SDUse &I : Node->ops())
+ Ops.push_back(I);
if (Opcode == AMDGPU::DOT_4) {
int OperandIdx[] = {
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index a8a464f..d22c8c9 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -51,15 +51,18 @@ private:
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+ SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
- /// \brief Lower ROTL opcode to BITALIGN
- SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index b0d9ae3..3972e2f 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -28,10 +28,9 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
-R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm),
- ST(tm.getSubtarget<AMDGPUSubtarget>())
+R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUInstrInfo(st),
+ RI(st)
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -52,11 +51,15 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
unsigned VectorComponents = 0;
- if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
- AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
+ AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+ (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
+ AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
VectorComponents = 4;
- } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
- AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
+ } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
+ AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+ (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
+ AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
VectorComponents = 2;
}
@@ -768,16 +771,6 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
-int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
- const MachineInstr *MI = op.getParent();
-
- switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
- case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
- };
-}
-
static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
@@ -1064,10 +1057,34 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
+bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+
+ switch(MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+ case AMDGPU::R600_EXTRACT_ELT_V2:
+ case AMDGPU::R600_EXTRACT_ELT_V4:
+ buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
+ RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
+ MI->getOperand(2).getReg(),
+ RI.getHWRegChan(MI->getOperand(1).getReg()));
+ break;
+ case AMDGPU::R600_INSERT_ELT_V2:
+ case AMDGPU::R600_INSERT_ELT_V4:
+ buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
+ RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
+ MI->getOperand(3).getReg(), // Offset
+ RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel
+ break;
+ }
+ MI->eraseFromParent();
+ return true;
+}
+
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+ static_cast<const AMDGPUFrameLowering*>(
+ MF.getTarget().getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
@@ -1100,7 +1117,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
- unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const {
+ unsigned AddrReg;
+ switch (AddrChan) {
+ default: llvm_unreachable("Invalid Channel");
+ case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+ }
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X, OffsetReg);
setImmOperand(MOVA, AMDGPU::OpName::write, 0);
@@ -1117,7 +1149,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
- unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const {
+ unsigned AddrReg;
+ switch (AddrChan) {
+ default: llvm_unreachable("Invalid Channel");
+ case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+ }
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
OffsetReg);
@@ -1220,7 +1267,6 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
const {
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
unsigned Opcode;
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::R700)
Opcode = AMDGPU::DOT4_r600;
else
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index b5304a0..45a57d3 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -32,12 +32,22 @@ namespace llvm {
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
- const AMDGPUSubtarget &ST;
- int getBranchInstr(const MachineOperand &op) const;
std::vector<std::pair<int, unsigned> >
ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
+
+ MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const;
+
+ MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const;
public:
enum BankSwizzle {
ALU_VEC_012_SCL_210 = 0,
@@ -48,7 +58,7 @@ namespace llvm {
ALU_VEC_210
};
- explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+ explicit R600InstrInfo(const AMDGPUSubtarget &st);
const R600RegisterInfo &getRegisterInfo() const override;
void copyPhysReg(MachineBasicBlock &MBB,
@@ -197,6 +207,8 @@ namespace llvm {
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override { return 1;}
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
/// \brief Reserve the registers that may be accesed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 590fde2..73fa345 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -125,7 +125,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
InstrItinClass itin = AnyALU> :
R600_1OP <inst, opName,
- [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin
>;
// If you add or change the operands for R600_2OP instructions, you must
@@ -161,10 +161,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
}
class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
- InstrItinClass itim = AnyALU> :
+ InstrItinClass itin = AnyALU> :
R600_2OP <inst, opName,
[(set R600_Reg32:$dst, (node R600_Reg32:$src0,
- R600_Reg32:$src1))]
+ R600_Reg32:$src1))], itin
>;
// If you add our change the operands for R600_3OP instructions, you must
@@ -721,14 +721,11 @@ def SETNE_DX10 : R600_2OP <
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
-def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
-// Add also ftrunc intrinsic pattern
-def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>;
-
def MOV : R600_1OP <0x19, "MOV", []>;
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
@@ -1082,18 +1079,21 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
let Itinerary = TransALU;
}
+// Clamped to maximum.
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+ inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped
> {
let Itinerary = TransALU;
}
-class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_IEEE", []
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy
> {
let Itinerary = TransALU;
}
+// TODO: There is also RECIPSQRT_FF which clamps to zero.
+
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
let Trig = 1;
@@ -1266,13 +1266,6 @@ defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
//===----------------------------------------------------------------------===//
-// Branch Instructions
-//===----------------------------------------------------------------------===//
-
-def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
- "IF_PREDICATE_SET $src", []>;
-
-//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -1345,15 +1338,6 @@ def TXD_SHADOW: InstR600 <
} // End isPseudo = 1
} // End usesCustomInserter = 1
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
- usesCustomInserter = 1 in {
- def RETURN : ILFormat<(outs), (ins variable_ops),
- "RETURN", [(IL_retflag)]>;
-}
-
//===----------------------------------------------------------------------===//
// Constant Buffer Addressing Support
@@ -1480,11 +1464,52 @@ let Inst{63-32} = Word1;
let VTXInst = 1;
}
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, rci:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, (i32 rci:$src0))]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, rcf:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, (f32 rcf:$src0))]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins R600_Reg32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins R600_Reg32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
-
-//===--------------------------------------------------------------------===//
-// Instructions support
-//===--------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// separate pass
@@ -1497,13 +1522,22 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
}
//===---------------------------------------------------------------------===//
-// Flow and Program control Instructions
+// Return instruction
//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+def IF_PREDICATE_SET : ILFormat<(outs), (ins R600_Reg32:$src),
+ "IF_PREDICATE_SET $src", []>;
+
let isTerminator=1 in {
- def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("SWITCH", " $src"), []>;
- def CASE : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("CASE", " $src"), []>;
def BREAK : ILFormat< (outs), (ins),
"BREAK", []>;
def CONTINUE : ILFormat< (outs), (ins),
@@ -1548,6 +1582,60 @@ let isTerminator=1 in {
}
//===----------------------------------------------------------------------===//
+// Indirect addressing pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+class ExtractVertical <RegisterClass vec_rc> : InstR600 <
+ (outs R600_Reg32:$dst),
+ (ins vec_rc:$vec, R600_Reg32:$index), "",
+ [],
+ AnyALU
+>;
+
+let Constraints = "$dst = $vec" in {
+
+class InsertVertical <RegisterClass vec_rc> : InstR600 <
+ (outs vec_rc:$dst),
+ (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
+ [],
+ AnyALU
+>;
+
+} // End Constraints = "$dst = $vec"
+
+} // End isPseudo = 1
+
+def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
+def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
+
+def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
+def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
+
+class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
+ ValueType scalar_ty> : Pat <
+ (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
+ (inst $vec, $index)
+>;
+
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
+
+class InsertVerticalPat <Instruction inst, ValueType vec_ty,
+ ValueType scalar_ty> : Pat <
+ (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
+ (inst $vec, $value, $index)
+>;
+
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
+
+//===----------------------------------------------------------------------===//
// ISel Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index d1655d1..7ea654c 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index c2f6c03..74cf309 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index f3bb88b..dc95675 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,15 +20,14 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
-: AMDGPURegisterInfo(tm),
- TM(tm)
+R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
+: AMDGPURegisterInfo(st)
{ RCW.RegWeight = 0; RCW.WeightLimit = 0;}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
@@ -55,16 +54,6 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass *
-R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- case AMDGPU::GPRI32RegClassID:
- return &AMDGPU::R600_Reg32RegClass;
- default: return rc;
- }
-}
-
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 52e1a4b..247808b 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -16,26 +16,18 @@
#define R600REGISTERINFO_H_
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
namespace llvm {
-class R600TargetMachine;
+class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
- AMDGPUTargetMachine &TM;
RegClassWeight RCW;
- R600RegisterInfo(AMDGPUTargetMachine &tm);
+ R600RegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override;
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns the R600 reg class that is equivalent to \p RC.
- const TargetRegisterClass *getISARegClass(
- const TargetRegisterClass *RC) const override;
-
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 68bcd20..cc667d9 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -18,18 +18,28 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1, sub2, sub3];
- let HWEncoding = encoding;
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
}
class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
}
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+ "V"#lo#hi#"_"#chan,
+ [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+ lo
+>;
foreach Index = 0-127 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -54,6 +64,24 @@ foreach Index = 0-127 in {
Index>;
}
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+ let chan_encoding = !if(!eq(Chan, "X"), 0,
+ !if(!eq(Chan, "Y"), 1,
+ !if(!eq(Chan, "Z"), 2,
+ !if(!eq(Chan, "W"), 3, 0)))) in {
+ def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+ [!cast<Register>("T0_"#Chan),
+ !cast<Register>("T1_"#Chan),
+ !cast<Register>("T2_"#Chan),
+ !cast<Register>("T3_"#Chan)],
+ 0>;
+ def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+ def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+ }
+}
+
+
// KCACHE_BANK0
foreach Index = 159-128 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -130,8 +158,14 @@ def ALU_PARAM : R600Reg<"Param", 0>;
let isAllocatable = 0 in {
-// XXX: Only use the X channel, until we support wider stack widths
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
+
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
(add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
@@ -206,5 +240,13 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
let CopyCost = -1;
}
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add V0123_W, V0123_Z, V0123_Y, V0123_X)
+>;
+
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add (sequence "T%u_XY", 0, 63))>;
+
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+ (add V01_X, V01_Y, V01_Z, V01_W,
+ V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index d6e4451..91eb60b 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -65,7 +65,6 @@ class SIAnnotateControlFlow : public FunctionPass {
DominatorTree *DT;
StackVector Stack;
- SSAUpdater PhiInserter;
bool isTopOfStack(BasicBlock *BB);
@@ -81,7 +80,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
- void handleLoopCondition(Value *Cond);
+ Value *handleLoopCondition(Value *Cond, PHINode *Broken);
void handleLoop(BranchInst *Term);
@@ -177,7 +176,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
} else {
if (Phi->getIncomingValue(i) != BoolFalse)
return false;
-
+
}
}
return true;
@@ -204,20 +203,26 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+ BasicBlock *Parent = Phi->getParent();
+ PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
+ Value *Ret = NewPhi;
// Handle all non-constant incoming values first
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = Phi->getIncomingValue(i);
- if (isa<ConstantInt>(Incoming))
+ BasicBlock *From = Phi->getIncomingBlock(i);
+ if (isa<ConstantInt>(Incoming)) {
+ NewPhi->addIncoming(Broken, From);
continue;
+ }
Phi->setIncomingValue(i, BoolFalse);
- handleLoopCondition(Incoming);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken);
+ NewPhi->addIncoming(PhiArg, From);
}
- BasicBlock *Parent = Phi->getParent();
BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
@@ -230,33 +235,28 @@ void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
if (From == IDom) {
CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
- Value *Args[] = {
- OldEnd->getArgOperand(0),
- PhiInserter.GetValueAtEndOfBlock(Parent)
- };
- Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
- PhiInserter.AddAvailableValue(Parent, Ret);
+ Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
+ Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
continue;
}
}
-
TerminatorInst *Insert = From->getTerminator();
- Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
- Value *Ret = CallInst::Create(Break, Arg, "", Insert);
- PhiInserter.AddAvailableValue(From, Ret);
+ Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
+ NewPhi->setIncomingValue(i, PhiArg);
}
eraseIfUnused(Phi);
+ return Ret;
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
TerminatorInst *Insert = Parent->getTerminator();
- Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
- Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
- PhiInserter.AddAvailableValue(Parent, Ret);
+ Value *Args[] = { Cond, Broken };
+ return CallInst::Create(IfBreak, Args, "", Insert);
} else {
llvm_unreachable("Unhandled loop condition!");
}
+ return 0;
}
/// \brief Handle a back edge (loop)
@@ -264,15 +264,11 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
- PhiInserter.Initialize(Int64, "");
- PhiInserter.AddAvailableValue(Target, Broken);
-
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- handleLoopCondition(Cond);
+ Value *Arg = handleLoopCondition(Cond, Broken);
BasicBlock *BB = Term->getParent();
- Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 2cbce28..4d31a11 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -35,4 +35,54 @@ enum {
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
+
+#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
+#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
+#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
+#define C_00B848_VGPRS 0xFFFFFFC0
+#define S_00B848_SGPRS(x) (((x) & 0x0F) << 6)
+#define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F)
+#define C_00B848_SGPRS 0xFFFFFC3F
+#define S_00B848_PRIORITY(x) (((x) & 0x03) << 10)
+#define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03)
+#define C_00B848_PRIORITY 0xFFFFF3FF
+#define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12)
+#define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
+#define C_00B848_FLOAT_MODE 0xFFF00FFF
+#define S_00B848_PRIV(x) (((x) & 0x1) << 20)
+#define G_00B848_PRIV(x) (((x) >> 20) & 0x1)
+#define C_00B848_PRIV 0xFFEFFFFF
+#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
+#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
+#define C_00B848_DX10_CLAMP 0xFFDFFFFF
+#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
+#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
+#define C_00B848_DEBUG_MODE 0xFFBFFFFF
+#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
+#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
+#define C_00B848_IEEE_MODE 0xFF7FFFFF
+
+
+// Helpers for setting FLOAT_MODE
+#define FP_ROUND_ROUND_TO_NEAREST 0
+#define FP_ROUND_ROUND_TO_INF 1
+#define FP_ROUND_ROUND_TO_NEGINF 2
+#define FP_ROUND_ROUND_TO_ZERO 3
+
+// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
+// precision.
+#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
+#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
+
+#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
+#define FP_DENORM_FLUSH_OUT 1
+#define FP_DENORM_FLUSH_IN 2
+#define FP_DENORM_FLUSH_NONE 3
+
+
+// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
+// precision.
+#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
+#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
+
#endif // SIDEFINES_H_
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
new file mode 100644
index 0000000..7d116ee
--- /dev/null
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -0,0 +1,110 @@
+//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// SALU instructions ignore control flow, so we need to modify the live ranges
+/// of the registers they define.
+///
+/// The strategy is to view the entire program as if it were a single basic
+/// block and calculate the intervals accordingly. We implement this
+/// by walking this list of segments for each LiveRange and setting the
+/// end of each segment equal to the start of the segment that immediately
+/// follows it.
+
+#include "AMDGPU.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-sgpr-live-ranges"
+
+namespace {
+
+class SIFixSGPRLiveRanges : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixSGPRLiveRanges() : MachineFunctionPass(ID) {
+ initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Fix SGPR live ranges";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
+ "SI Fix SGPR Live Ranges", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
+ "SI Fix SGPR Live Ranges", false, false)
+
+char SIFixSGPRLiveRanges::ID = 0;
+
+char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID;
+
+FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
+ return new SIFixSGPRLiveRanges();
+}
+
+bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC);
+ if (ExecUse)
+ continue;
+
+ for (const MachineOperand &Def : MI.operands()) {
+ if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg());
+
+ if (!TRI->isSGPRClass(RC))
+ continue;
+ LiveInterval &LI = LIS->getInterval(Def.getReg());
+ for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) {
+ LiveRange::Segment &Seg = LI.segments[i];
+ LiveRange::Segment &Next = LI.segments[i + 1];
+ Seg.end = Next.start;
+ }
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index c9e247c..b13c3b8 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,8 +14,8 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -76,6 +77,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
+ setOperationAction(ISD::SUBC, MVT::i32, Legal);
+ setOperationAction(ISD::SUBE, MVT::i32, Legal);
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -88,14 +91,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
// We need to custom lower loads/stores from private memory
setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
@@ -105,18 +106,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
-
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -139,6 +136,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
@@ -215,9 +213,16 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ // FIXME: These should be removed and handled the same was as f32 fneg. Source
+ // modifiers also work for the double instructions.
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
+
setSchedulingPreference(Sched::RegPressure);
}
@@ -265,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
-bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
- return VT.getScalarType().bitsLE(MVT::i16);
+TargetLoweringBase::LegalizeTypeAction
+SITargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
+ return TypeSplitVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
}
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -482,19 +491,20 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::V_SUB_F64:
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(MI->getOperand(2).getReg())
- .addImm(0) /* src2 */
- .addImm(0) /* ABS */
- .addImm(0) /* CLAMP */
- .addImm(0) /* OMOD */
- .addImm(2); /* NEG */
+ case AMDGPU::V_SUB_F64: {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
+ .addImm(0) // SRC0 modifiers
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(1) // SRC1 modifiers
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(0) // SRC2 modifiers
+ .addImm(0) // src2
+ .addImm(0) // CLAMP
+ .addImm(0); // OMOD
MI->eraseFromParent();
break;
-
+ }
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -595,27 +605,31 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // These loads are legal.
+ if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ VT.isVector() && VT.getVectorNumElements() == 2 &&
+ VT.getVectorElementType() == MVT::i32)
+ return SDValue();
+
if (Op.getValueType().isVector() &&
(Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
(Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
Op.getValueType().getVectorNumElements() > 4))) {
- SDValue MergedValues[2] = {
- SplitVectorLoad(Op, DAG),
- Load->getChain()
- };
- return DAG.getMergeValues(MergedValues, SDLoc(Op));
+ return SplitVectorLoad(Op, DAG);
} else {
- return LowerLOAD(Op, DAG);
+ SDValue Result = LowerLOAD(Op, DAG);
+ assert((!Result.getNode() ||
+ Result.getNode()->getNumValues() == 2) &&
+ "Load should return a value and a chain");
+ return Result;
}
}
case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::ANY_EXTEND: // Fall-through
- case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
@@ -827,13 +841,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
- SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
- SDValue MergedValues[2];
- MergedValues[1] = Load->getChain();
- if (Ret.getNode()) {
- MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, DL);
- }
+ SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+ if (Lowered.getNode())
+ return Lowered;
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return SDValue();
@@ -846,25 +856,38 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
DAG.getConstant(2, MVT::i32));
- Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Load->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+
+ // FIXME: REGISTER_LOAD should probably have a chain result.
+ SDValue Chain = Load->getChain();
+ SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+
+ SDValue Ret = LoLoad.getValue(0);
if (MemVT.getSizeInBits() == 64) {
+ // TODO: This needs a test to make sure the right thing is happening with
+ // the chain. That is hard without general function support.
+
SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
DAG.getConstant(1, MVT::i32));
- SDValue LoadUpper = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Load->getChain(), IncPtr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+ SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, IncPtr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
- Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ret, LoadUpper);
+ Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
+ // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ // LoLoad.getValue(1), HiLoad.getValue(1));
}
- MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, DL);
+ SDValue Ops[] = {
+ Ret,
+ Chain
+ };
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
@@ -903,39 +926,17 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
}
-SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
- return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
-}
-
-SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- if (VT != MVT::i64) {
- return SDValue();
- }
-
- SDValue Hi = DAG.getNode(ISD::SRA, DL, MVT::i32, Op.getOperand(0),
- DAG.getConstant(31, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
-}
-
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();
+ // These stores are legal.
+ if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ VT.isVector() && VT.getVectorNumElements() == 2 &&
+ VT.getVectorElementType() == MVT::i32)
+ return SDValue();
+
SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
if (Ret.getNode())
return Ret;
@@ -1011,27 +1012,99 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Chain;
}
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ EVT ScalarVT = VT.getScalarType();
+ if (ScalarVT != MVT::f32)
+ return SDValue();
-SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
- if (VT != MVT::i64) {
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // TODO: We could try to match extracting the higher bytes, which would be
+ // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+ // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+ // about in practice.
+ if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
+ if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
+ DCI.AddToWorklist(Cvt.getNode());
+ return Cvt;
+ }
+ }
+
+ // We are primarily trying to catch operations on illegal vector types
+ // before they are expanded.
+ // For scalars, we can use the more flexible method of checking masked bits
+ // after legalization.
+ if (!DCI.isBeforeLegalize() ||
+ !SrcVT.isVector() ||
+ SrcVT.getVectorElementType() != MVT::i8) {
return SDValue();
}
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() != MVT::i32)
- Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+ assert(DCI.isBeforeLegalize() && "Unexpected legal type");
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero);
-}
+ // Weird sized vectors are a pain to handle, but we know 3 is really the same
+ // size as 4.
+ unsigned NElts = SrcVT.getVectorNumElements();
+ if (!SrcVT.isSimple() && NElts != 3)
+ return SDValue();
-//===----------------------------------------------------------------------===//
-// Custom DAG optimizations
-//===----------------------------------------------------------------------===//
+ // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
+ // prevent a mess from expanding to v4i32 and repacking.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+ EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
+ EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
+
+ LoadSDNode *Load = cast<LoadSDNode>(Src);
+ SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
+ Load->getChain(),
+ Load->getBasePtr(),
+ LoadVT,
+ Load->getMemOperand());
+
+ // Make sure successors of the original load stay after it by updating
+ // them to use the new Chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
+
+ SmallVector<SDValue, 4> Elts;
+ if (RegVT.isVector())
+ DAG.ExtractVectorElements(NewLoad, Elts);
+ else
+ Elts.push_back(NewLoad);
+
+ SmallVector<SDValue, 4> Ops;
+
+ unsigned EltIdx = 0;
+ for (SDValue Elt : Elts) {
+ unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
+ for (unsigned I = 0; I < ComponentsInElt; ++I) {
+ unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
+ SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
+ DCI.AddToWorklist(Cvt.getNode());
+ Ops.push_back(Cvt);
+ }
+
+ ++EltIdx;
+ }
+
+ assert(Ops.size() == NElts);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
+ }
+
+ return SDValue();
+}
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -1074,6 +1147,31 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3: {
+ unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
+
+ SDValue Src = N->getOperand(0);
+ APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+ TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ break;
+ }
+
+ case ISD::UINT_TO_FP: {
+ return performUCharToFloatCombine(N, DCI);
+ }
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -1297,7 +1395,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
bool HaveVSrc = false, HaveSSrc = false;
- // First figure out what we alread have in this instruction
+ // First figure out what we already have in this instruction.
for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
i != e && Op < NumOps; ++i, ++Op) {
@@ -1316,7 +1414,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
}
}
- // If we neither have VSrc nor SSrc it makes no sense to continue
+ // If we neither have VSrc nor SSrc, it makes no sense to continue.
if (!HaveVSrc && !HaveSSrc)
return Node;
@@ -1332,17 +1430,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
const SDValue &Operand = Node->getOperand(i);
Ops.push_back(Operand);
- // Already folded immediate ?
+ // Already folded immediate?
if (isa<ConstantSDNode>(Operand.getNode()) ||
isa<ConstantFPSDNode>(Operand.getNode()))
continue;
- // Is this a VSrc or SSrc operand ?
+ // Is this a VSrc or SSrc operand?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
if (isVSrc(RegClass) || isSSrc(RegClass)) {
// Try to fold the immediates
if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't worked, make sure we don't hit the SReg limit
+ // Folding didn't work, make sure we don't hit the SReg limit.
ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
}
continue;
@@ -1371,7 +1469,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
continue;
if (DescE64) {
-
// Test if it makes sense to switch to e64 encoding
unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
@@ -1402,7 +1499,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
if (!DescE64)
continue;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
Ops.pop_back();
@@ -1412,7 +1509,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
if (!DescE64)
continue;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
}
@@ -1535,7 +1632,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}
-/// \brief Fold the instructions after slecting them
+/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const SIInstrInfo *TII =
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index c6eaa81..e25323a 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -27,10 +27,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
@@ -46,11 +43,16 @@ class SITargetLowering : public AMDGPUTargetLowering {
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
+ static SDValue performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI);
+
public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
bool *IsFast) const override;
- bool shouldSplitVectorType(EVT VT) const override;
+
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index a17fed7..1733326 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
return Result;
}
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 168eff2..7cae9fc 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -51,6 +51,16 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
let Size = 8;
}
+class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
+}
+
//===----------------------------------------------------------------------===//
// Scalar operations
//===----------------------------------------------------------------------===//
@@ -207,7 +217,7 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+ VOP3Common <outs, ins, asm, pattern> {
bits<8> dst;
bits<2> src0_modifiers;
@@ -233,16 +243,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{61} = src0_modifiers{0};
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP3 = 1;
+
}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+ VOP3Common <outs, ins, asm, pattern> {
bits<8> dst;
bits<2> src0_modifiers;
@@ -266,11 +271,6 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP3 = 1;
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4a9e346..455c890 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -19,13 +19,14 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrDesc.h"
using namespace llvm;
-SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm) { }
+SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUInstrInfo(st),
+ RI(st) { }
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -187,18 +188,25 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
+ .addReg(SrcReg);
+ } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
+ unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
- MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
} else if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for vector
@@ -207,8 +215,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// Reserve a spot in the spill tracker for each sub-register of
// the vector register.
unsigned NumSubRegs = RC->getSize() / 4;
- unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
- NumSubRegs);
+ unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
FirstLane);
@@ -234,19 +241,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
- if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
- SIMachineFunctionInfo::SpilledReg Spill =
- MFI->SpillTracker.getSpilledReg(FrameIndex);
- assert(Spill.VGPR);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
- insertNOPs(MI, 3);
+
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addImm(0);
} else if (RI.isSGPRClass(RC)){
unsigned Opcode;
switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
@@ -260,7 +267,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addReg(Spill.VGPR)
.addImm(FrameIndex);
- insertNOPs(MI, 3);
} else {
llvm_unreachable("VGPR spilling not supported");
}
@@ -281,6 +287,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
return 2;
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return 1;
default: llvm_unreachable("Invalid spill opcode");
}
}
@@ -334,7 +342,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE: {
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -348,6 +357,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
.addReg(MI->getOperand(1).getReg())
.addImm(Spill.Lane + i);
}
+ insertNOPs(MI, 3);
MI->eraseFromParent();
break;
}
@@ -514,6 +524,23 @@ bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
}
+static bool compareMachineOp(const MachineOperand &Op0,
+ const MachineOperand &Op1) {
+ if (Op0.getType() != Op1.getType())
+ return false;
+
+ switch (Op0.getType()) {
+ case MachineOperand::MO_Register:
+ return Op0.getReg() == Op1.getReg();
+ case MachineOperand::MO_Immediate:
+ return Op0.getImm() == Op1.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return Op0.getFPImm() == Op1.getFPImm();
+ default:
+ llvm_unreachable("Didn't expect to be comparing these operand types");
+ }
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
@@ -532,7 +559,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Make sure the register classes are correct
for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
switch (Desc.OpInfo[i].OperandType) {
- case MCOI::OPERAND_REGISTER:
+ case MCOI::OPERAND_REGISTER: {
+ int RegClass = Desc.OpInfo[i].RegClass;
+ if (!RI.regClassCanUseImmediate(RegClass) &&
+ (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
+ ErrInfo = "Expected register, but got immediate";
+ return false;
+ }
+ }
break;
case MCOI::OPERAND_IMMEDIATE:
if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
@@ -620,6 +654,24 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
}
+
+ // Verify misc. restrictions on specific instructions.
+ if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
+ Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
+ MI->dump();
+
+ const MachineOperand &Src0 = MI->getOperand(2);
+ const MachineOperand &Src1 = MI->getOperand(3);
+ const MachineOperand &Src2 = MI->getOperand(4);
+ if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
+ if (!compareMachineOp(Src0, Src1) &&
+ !compareMachineOp(Src0, Src2)) {
+ ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -654,7 +706,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
@@ -667,6 +721,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+ case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+ case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
}
}
@@ -731,8 +788,8 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
unsigned SubReg = MRI.createVirtualRegister(SubRC);
// Just in case the super register is itself a sub-register, copy it to a new
- // value so we don't need to wory about merging its subreg index with the
- // SubIdx passed to this function. The register coalescer should be able to
+ // value so we don't need to worry about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
NewSuperReg)
@@ -1157,22 +1214,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
continue;
}
case AMDGPU::S_AND_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BCNT1_I32_B64:
+ splitScalar64BitBCNT(Worklist, Inst);
Inst->eraseFromParent();
continue;
@@ -1217,6 +1279,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// 3 to not hit an assertion later in MCInstLower.
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ Inst->addOperand(MachineOperand::CreateImm(0));
}
addDescImplicitUseDef(NewDesc, Inst);
@@ -1297,9 +1363,62 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::VReg_32RegClass;
}
-void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst,
- unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitUnaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src0 = Inst->getOperand(1);
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ const TargetRegisterClass *Src0RC = Src0.isReg() ?
+ MRI.getRegClass(Src0.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+
+ MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub0, Src0SubRC);
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+ const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+ unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ .addOperand(SrcReg0Sub0);
+
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+
+ unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ .addOperand(SrcReg0Sub1);
+
+ unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep it
+ // valid.
+ Worklist.push_back(LoHalf);
+ Worklist.push_back(HiHalf);
+}
+
+void SIInstrInfo::splitScalar64BitBinaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst->getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -1360,6 +1479,46 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
Worklist.push_back(HiHalf);
}
+void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+
+ const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
+ const TargetRegisterClass *SrcRC = Src.isReg() ?
+ MRI.getRegClass(Src.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub0, SrcSubRC);
+ MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub1, SrcSubRC);
+
+ MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ .addOperand(SrcRegSub0)
+ .addImm(0);
+
+ MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ .addOperand(SrcRegSub1)
+ .addReg(MidReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+
+ Worklist.push_back(First);
+ Worklist.push_back(Second);
+}
+
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
MachineInstr *Inst) const {
// Add the implict and explicit register definitions.
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 7b31a81..4c204d8 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -44,13 +44,19 @@ private:
const TargetRegisterClass *RC,
const MachineOperand &Op) const;
- void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist,
- MachineInstr *Inst, unsigned Opcode) const;
+ void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst, unsigned Opcode) const;
+
+ void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst, unsigned Opcode) const;
+
+ void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
public:
- explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+ explicit SIInstrInfo(const AMDGPUSubtarget &st);
const SIRegisterInfo &getRegisterInfo() const override {
return RI;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 2242e6d..774c9d1 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -147,6 +147,12 @@ def FRAMEri32 : Operand<iPTR> {
}
//===----------------------------------------------------------------------===//
+// Complex patterns
+//===----------------------------------------------------------------------===//
+
+def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
+
+//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -187,6 +193,12 @@ class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
opName#" $dst, $src0", pattern
>;
+// 64-bit input, 32-bit output.
+class SOP1_32_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -260,7 +272,7 @@ class SIMCInstr <string pseudo, int subtarget> {
multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
- def "" : InstSI <outs, ins, "", pattern>, VOP <opName>,
+ def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
SIMCInstr<OpName, SISubtarget.NONE> {
let isPseudo = 1;
}
@@ -357,12 +369,13 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
}
multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, ValueType vt, PatLeaf cond> {
-
+ string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
def _e32 : VOPC <
op, (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", []
- >, VOP <opName>;
+ >, VOP <opName> {
+ let Defs = !if(defExec, [VCC, EXEC], [VCC]);
+ }
def _e64 : VOP3 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -375,6 +388,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
[(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
)
>, VOP <opName> {
+ let Defs = !if(defExec, [EXEC], []);
let src2 = SIOperand.ZERO;
let src2_modifiers = 0;
}
@@ -388,6 +402,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
ValueType vt = untyped, PatLeaf cond = COND_NULL>
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+multiclass VOPCX_32 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+
+multiclass VOPCX_64 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+
multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
op, (outs VReg_32:$dst),
(ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
@@ -396,7 +418,7 @@ multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
>;
-class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
+class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -410,11 +432,29 @@ class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
- (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+ (ins InputMods:$src0_modifiers, VSrc_64:$src0,
+ InputMods:$src1_modifiers, VSrc_64:$src1,
+ InputMods:$src2_modifiers, VSrc_64:$src2,
+ InstFlag:$clamp, InstFlag:$omod),
+ opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
+>, VOP <opName>;
+
+
+class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> : VOP3 <
+ op, (outs vrc:$dst0, SReg_64:$dst1),
+ (ins arc:$src0, arc:$src1, arc:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+ opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
+
+class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+ VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+ VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
//===----------------------------------------------------------------------===//
// Vector I/O classes
//===----------------------------------------------------------------------===//
@@ -475,10 +515,11 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A
let vdst = 0;
}
+// 1 address, 1 data.
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
asm#" $vdst, $addr, $data0, $offset, [M0]",
[]> {
@@ -487,6 +528,41 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
let mayLoad = 1;
}
+// 1 address, 2 data.
+class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs rc:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
+ asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
+// 1 address, 2 data.
+class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
+ asm#" $addr, $data0, $data1, $offset, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
+// 1 address, 1 data.
+class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
+ asm#" $addr, $data0, $offset, [M0]",
+ []> {
+
+ let data1 = 0;
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
@@ -500,7 +576,9 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
let mayLoad = 0;
}
-multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
+multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> {
let lds = 0, mayLoad = 1 in {
@@ -542,16 +620,19 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+ asm#" $vdata, $srsrc + $vaddr + $offset",
+ [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
+ i64:$vaddr, u16imm:$offset)))]>;
}
}
}
-class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+ ValueType store_vt, SDPatternOperator st> :
MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
u16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
- []> {
+ [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> {
let mayLoad = 0;
let mayStore = 1;
@@ -658,6 +739,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
}
+class MIMG_Gather_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
+ op,
+ (outs dst_rc:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+ SReg_256:$srsrc, SReg_128:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+
+ // DMASK was repurposed for GATHER4. 4 components are always
+ // returned and DMASK works like a swizzle - it selects
+ // the component to fetch. The only useful DMASK values are
+ // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ // (red,red,red,red) etc.) The ISA document doesn't mention
+ // this.
+ // Therefore, disable all code which updates DMASK by setting these two:
+ let MIMG = 0;
+ let hasPostISelHook = 0;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 500fa78..b3b44e2 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -96,22 +96,35 @@ def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
[(set i32:$dst, (not i32:$src0))]
>;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
+ [(set i64:$dst, (not i64:$src0))]
+>;
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+ [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+>;
def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
} // End neverHasSideEffects = 1
////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
-////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
-////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
+ [(set i32:$dst, (ctpop i32:$src0))]
+>;
+def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
+
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+ [(set i32:$dst, (cttz_zero_undef i32:$src0))]
+>;
////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+ [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+>;
+
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
@@ -320,7 +333,7 @@ def S_CMPK_EQ_I32 : SOPK <
>;
*/
-let isCompare = 1 in {
+let isCompare = 1, Defs = [SCC] in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
@@ -332,7 +345,7 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-} // End isCompare = 1
+} // End isCompare = 1, Defs = [SCC]
let Defs = [SCC], isCommutable = 1 in {
def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -467,26 +480,26 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
@@ -505,26 +518,26 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
@@ -543,26 +556,26 @@ defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
@@ -611,18 +624,18 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
@@ -633,18 +646,18 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
@@ -655,18 +668,18 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
@@ -677,30 +690,30 @@ defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1
defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1
} // End isCompare = 1
@@ -708,8 +721,97 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
// DS Instructions
//===----------------------------------------------------------------------===//
-def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
-def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
+
+def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
+def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
+def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
+def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>;
+def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>;
+def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
+def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
+def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
+def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
+def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
+def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
+def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
+def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
+def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
+def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
+def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
+def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
+
+def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
+def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
+def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
+def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
+def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
+def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
+def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
+def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
+def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
+def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
+def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
+def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
+def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
+def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
+//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
+//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
+def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
+def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
+def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
+def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
+
+let SubtargetPredicate = isCI in {
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
+} // End isCI
+
+
+def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
+def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
+def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
+def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>;
+def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>;
+def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
+def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
+def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
+def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
+def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
+def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
+def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
+def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
+def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
+def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
+def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
+def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
+
+def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
+def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
+def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
+def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
+def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
+def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
+def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
+def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
+def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
+def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
+def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
+def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
+def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
+def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
+//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
+//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
+def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
+def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
+def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
+def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
+
+//let SubtargetPredicate = isCI in {
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+//} // End isCI
+
+// TODO: _SRC2_* forms
+
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
@@ -744,32 +846,46 @@ defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMA
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
-defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>;
-defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>;
-defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>;
-defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>;
-defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
+ 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global
+>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
+ 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global
+>;
+defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
+ 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global
+>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
+ 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global
+>;
+defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
+ 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
+ 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
+ 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load
+>;
def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "BUFFER_STORE_BYTE", VReg_32
+ 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global
>;
def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "BUFFER_STORE_SHORT", VReg_32
+ 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global
>;
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32
+ 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store
>;
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64
+ 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store
>;
def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128
+ 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -885,31 +1001,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
@@ -962,8 +1078,12 @@ defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
-//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
+ [(set i32:$dst, (f32_to_f16 f32:$src0))]
+>;
+defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
+ [(set f32:$dst, (f16_to_f32 i32:$src0))]
+>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
@@ -973,10 +1093,18 @@ defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
[(set f64:$dst, (fextend f32:$src0))]
>;
-//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
-//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
-//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
-//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
+>;
defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
[(set i32:$dst, (fp_to_uint f64:$src0))]
>;
@@ -988,7 +1116,7 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
- [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
+ [(set f32:$dst, (ftrunc f32:$src0))]
>;
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
[(set f32:$dst, (fceil f32:$src0))]
@@ -1006,24 +1134,33 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
[(set f32:$dst, (flog2 f32:$src0))]
>;
+
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+ [(set f32:$dst, (AMDGPUrcp f32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
+ [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
+>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
- [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
+ [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
+ [(set f32:$dst, (AMDGPUrsq f32:$src0))]
>;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
- [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+ [(set f64:$dst, (AMDGPUrcp f64:$src0))]
>;
defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
+ [(set f64:$dst, (AMDGPUrsq f64:$src0))]
+>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
+ [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
+>;
defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
[(set f32:$dst, (fsqrt f32:$src0))]
>;
@@ -1211,7 +1348,7 @@ defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
@@ -1303,16 +1440,20 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
+ [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
+ [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
+>;
-def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
+def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
-def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
+def V_LSHR_B64 : VOP3_64_32 <0x00000162, "V_LSHR_B64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
-def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64",
+def V_ASHR_I64 : VOP3_64_32 <0x00000163, "V_ASHR_I64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
@@ -1336,14 +1477,23 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
} // isCommutable = 1
-defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+
+// Double precision division pre-scale.
+def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
+ [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
+ [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+>;
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
+ [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
@@ -1500,7 +1650,7 @@ let usesCustomInserter = 1 in {
// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
- (ins SReg_64:$ptr),
+ (ins SSrc_64:$ptr),
"", []
>;
@@ -1508,7 +1658,7 @@ def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
"V_SUB_F64 $dst, $src0, $src1",
- []
+ [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
>;
} // end usesCustomInserter
@@ -1529,6 +1679,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
}
+defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
@@ -1552,7 +1703,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
@@ -1564,11 +1715,6 @@ def : Pat <
$src0, $src1, $src2, $src3)
>;
-def : Pat <
- (f64 (fsub f64:$src0, f64:$src1)),
- (V_SUB_F64 $src0, $src1)
->;
-
//===----------------------------------------------------------------------===//
// SMRD Patterns
//===----------------------------------------------------------------------===//
@@ -1596,7 +1742,6 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
@@ -1615,6 +1760,24 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
+} // Predicates = [isSI] in {
+
+//===----------------------------------------------------------------------===//
+// SOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI, isCFDepth0] in {
+
+def : Pat <
+ (i64 (ctpop i64:$src)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_BCNT1_I32_B64 $src), sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+} // Predicates = [isSI, isCFDepth0]
+
+let Predicates = [isSI] in {
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
@@ -1625,18 +1788,39 @@ def : Pat <
>;
//===----------------------------------------------------------------------===//
-// VOP2 Patterns
+// SOPP Patterns
//===----------------------------------------------------------------------===//
def : Pat <
- (or i64:$src0, i64:$src1),
+ (int_AMDGPU_barrier_global),
+ (S_BARRIER)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : RcpPat<V_RCP_F32_e32, f32>;
+def : RcpPat<V_RCP_F64_e32, f64>;
+defm : RsqPat<V_RSQ_F32_e32, f32>;
+defm : RsqPat<V_RSQ_F64_e32, f64>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+class BinOp64Pat <SDNode node, Instruction inst> : Pat <
+ (node i64:$src0, i64:$src1),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
+ (inst (EXTRACT_SUBREG i64:$src0, sub0),
(EXTRACT_SUBREG i64:$src1, sub0)), sub0),
- (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
+ (inst (EXTRACT_SUBREG i64:$src0, sub1),
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
>;
+def : BinOp64Pat <or, V_OR_B32_e32>;
+def : BinOp64Pat <xor, V_XOR_B32_e32>;
+
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
(sext_inreg i32:$src0, vt),
(V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
@@ -1645,10 +1829,82 @@ class SextInReg <ValueType vt, int ShiftAmt> : Pat <
def : SextInReg <i8, 24>;
def : SextInReg <i16, 16>;
+def : Pat <
+ (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+ (V_BCNT_U32_B32_e32 $popcnt, $val)
+>;
+
+def : Pat <
+ (i32 (ctpop i32:$popcnt)),
+ (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0)
+>;
+
+def : Pat <
+ (i64 (ctpop i64:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1),
+ (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)),
+ sub0),
+ (V_MOV_B32_e32 0), sub1)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm,
+ i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+ (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+ (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+ $addr, $rsrc, $sampler)
+>;
+
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
/* SIsample for simple 1D texture lookup */
def : Pat <
(SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
@@ -1864,7 +2120,10 @@ def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <i64, v2i32, VReg_64>;
-
+def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
@@ -1894,7 +2153,7 @@ def FCLAMP_SI : AMDGPUShaderInst <
}
def : Pat <
- (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
(FCLAMP_SI f32:$src)
>;
@@ -2106,7 +2365,7 @@ def : Pat <
(V_MUL_HI_I32 $src0, $src1, (i32 0))
>;
-defm : BFIPatterns <V_BFI_B32>;
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
def : ROTRPattern <V_ALIGNBIT_B32>;
/********** ======================= **********/
@@ -2130,7 +2389,7 @@ defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
defm : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_B64, i64, local_load>;
+defm : DSReadPat <DS_READ_B64, v2i32, local_load>;
multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
@@ -2139,48 +2398,109 @@ multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
>;
def : Pat <
- (frag vt:$src1, i32:$src0),
- (inst 0, $src0, $src1, 0)
+ (frag vt:$val, i32:$ptr),
+ (inst 0, $ptr, $val, 0)
>;
}
defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
-defm : DSWritePat <DS_WRITE_B64, i64, local_store>;
+defm : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
-def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
- (DS_ADD_U32_RTN 0, $ptr, $val, 0)>;
-
-def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
- (DS_SUB_U32_RTN 0, $ptr, $val, 0)>;
+multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ >;
-//===----------------------------------------------------------------------===//
-// MUBUF Patterns
-//===----------------------------------------------------------------------===//
+ def : Pat <
+ (frag i32:$ptr, vt:$val),
+ (inst 0, $ptr, $val, 0)
+ >;
+}
-multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
- PatFrag global_ld, PatFrag constant_ld> {
+// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
+//
+// We need to use something for the data0, so we set a register to
+// -1. For the non-rtn variants, the manual says it does
+// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
+// will always do the increment so I'm assuming it's the same.
+//
+// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
+// needs to be a VGPR. The SGPR copy pass will fix this, and it's
+// easier since there is no v_mov_b64.
+multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
+ Instruction LoadImm, PatFrag frag> {
def : Pat <
- (vt (global_ld (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;
def : Pat <
- (vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))),
- (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
+ (frag i32:$ptr, (vt 1)),
+ (inst 0, $ptr, (LoadImm (vt -1)), 0)
>;
+}
+multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
- (vt (global_ld i64:$ptr)),
- (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;
def : Pat <
- (vt (global_ld (add i64:$ptr, i64:$offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
+ (frag i32:$ptr, vt:$cmp, vt:$swap),
+ (inst 0, $ptr, $cmp, $swap, 0)
>;
+}
+
+
+// 32-bit atomics.
+defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_add_local>;
+defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_sub_local>;
+
+defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
+defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
+defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
+defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
+defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
+defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+
+defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+
+// 64-bit atomics.
+defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_add_local>;
+defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_sub_local>;
+
+defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
+defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
+defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
+defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
+defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
+defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+
+defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
+ PatFrag constant_ld> {
def : Pat <
(vt (constant_ld (add i64:$ptr, i64:$offset))),
(Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
@@ -2188,53 +2508,19 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
}
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32,
- sextloadi8_global, sextloadi8_constant>;
+ sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
- az_extloadi8_global, az_extloadi8_constant>;
+ az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32,
- sextloadi16_global, sextloadi16_constant>;
+ sextloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32,
- az_extloadi16_global, az_extloadi16_constant>;
+ az_extloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
- global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- az_extloadi32_global, az_extloadi32_constant>;
+ constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
- global_load, constant_load>;
+ constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
- global_load, constant_load>;
-
-multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
-
- def : Pat <
- (st vt:$value, (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset)),
- (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
- >;
-
- def : Pat <
- (st vt:$value, (add i64:$ptr, IMM12bit:$offset)),
- (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
- >;
-
- def : Pat <
- (st vt:$value, i64:$ptr),
- (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
- >;
-
- def : Pat <
- (st vt:$value, (add i64:$ptr, i64:$offset)),
- (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0)
- >;
-}
-
-defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+ constant_load>;
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
@@ -2301,7 +2587,7 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-let Predicates = [isCI] in {
+let SubtargetPredicate = isCI in {
// Sea island new arithmetic instructinos
let neverHasSideEffects = 1 in {
@@ -2348,7 +2634,7 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
// BUFFER_LOAD_DWORDX3
// BUFFER_STORE_DWORDX3
-} // End Predicates = [isCI]
+} // End iSCI
/********** ====================== **********/
@@ -2360,13 +2646,13 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
(vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
>;
// 2. Extract without offset
def : Pat<
(vector_extract vt:$vec, i32:$idx),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
>;
// 3. Insert with offset
@@ -2392,20 +2678,6 @@ defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
-/********** =============== **********/
-/********** Conditions **********/
-/********** =============== **********/
-
-def : Pat<
- (i1 (setcc f32:$src0, f32:$src1, SETO)),
- (V_CMP_O_F32_e64 $src0, $src1)
->;
-
-def : Pat<
- (i1 (setcc f32:$src0, f32:$src1, SETUO)),
- (V_CMP_U_F32_e64 $src0, $src1)
->;
-
//===----------------------------------------------------------------------===//
// Conversion Patterns
//===----------------------------------------------------------------------===//
@@ -2439,6 +2711,62 @@ def : Pat <
(S_MOV_B32 -1), sub1)
>;
+class ZExt_i64_i32_Pat <SDNode ext> : Pat <
+ (i64 (ext i32:$src)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+class ZExt_i64_i1_Pat <SDNode ext> : Pat <
+ (i64 (ext i1:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+
+def : ZExt_i64_i32_Pat<zext>;
+def : ZExt_i64_i32_Pat<anyext>;
+def : ZExt_i64_i1_Pat<zext>;
+def : ZExt_i64_i1_Pat<anyext>;
+
+def : Pat <
+ (i64 (sext i32:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
+ (S_ASHR_I32 $src, 31), sub1)
+>;
+
+def : Pat <
+ (i64 (sext i1:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)),
+ (V_CNDMASK_B32_e64 0, -1, $src), sub0),
+ (V_CNDMASK_B32_e64 0, -1, $src), sub1)
+>;
+
+def : Pat <
+ (f32 (sint_to_fp i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
+>;
+
+def : Pat <
+ (f32 (uint_to_fp i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)
+>;
+
+def : Pat <
+ (f64 (sint_to_fp i1:$src)),
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+>;
+
+def : Pat <
+ (f64 (uint_to_fp i1:$src)),
+ (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+>;
+
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 00e32c0..df690a4 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in {
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Fully-flexible SAMPLE instruction.
+ class SampleRaw : Intrinsic <
+ [llvm_v4f32_ty], // vdata(VGPR)
+ [llvm_anyint_ty, // vaddr(VGPR)
+ llvm_v32i8_ty, // rsrc(SGPR)
+ llvm_v16i8_ty, // sampler(SGPR)
+ llvm_i32_ty, // dmask(imm)
+ llvm_i32_ty, // unorm(imm)
+ llvm_i32_ty, // r128(imm)
+ llvm_i32_ty, // da(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty, // tfe(imm)
+ llvm_i32_ty], // lwe(imm)
+ [IntrNoMem]>;
+
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
def int_SI_sampled : Sample;
def int_SI_samplel : Sample;
+ // Basic gather4
+ def int_SI_gather4 : SampleRaw;
+ def int_SI_gather4_cl : SampleRaw;
+ def int_SI_gather4_l : SampleRaw;
+ def int_SI_gather4_b : SampleRaw;
+ def int_SI_gather4_b_cl : SampleRaw;
+ def int_SI_gather4_lz : SampleRaw;
+
+ // Gather4 with comparison
+ def int_SI_gather4_c : SampleRaw;
+ def int_SI_gather4_c_cl : SampleRaw;
+ def int_SI_gather4_c_l : SampleRaw;
+ def int_SI_gather4_c_b : SampleRaw;
+ def int_SI_gather4_c_b_cl : SampleRaw;
+ def int_SI_gather4_c_lz : SampleRaw;
+
+ // Gather4 with offsets
+ def int_SI_gather4_o : SampleRaw;
+ def int_SI_gather4_cl_o : SampleRaw;
+ def int_SI_gather4_l_o : SampleRaw;
+ def int_SI_gather4_b_o : SampleRaw;
+ def int_SI_gather4_b_cl_o : SampleRaw;
+ def int_SI_gather4_lz_o : SampleRaw;
+
+ // Gather4 with comparison and offsets
+ def int_SI_gather4_c_o : SampleRaw;
+ def int_SI_gather4_c_cl_o : SampleRaw;
+ def int_SI_gather4_c_l_o : SampleRaw;
+ def int_SI_gather4_c_b_o : SampleRaw;
+ def int_SI_gather4_c_b_cl_o : SampleRaw;
+ def int_SI_gather4_c_lz_o : SampleRaw;
+
+ def int_SI_getlod : SampleRaw;
+
def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 6601f2a..9f5ff29 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -86,6 +86,7 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+/// The m0 register stores the maximum allowable address for LDS reads and
+/// writes. Its value must be at least the size in bytes of LDS allocated by
+/// the shader. For simplicity, we set it to the maximum possible value.
+void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+ AMDGPU::M0).addImm(0xffffffff);
+}
+
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -333,52 +342,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
MBB.insert(I, MovRel);
- MI.eraseFromParent();
- return;
- }
+ } else {
- assert(AMDGPU::SReg_64RegClass.contains(Save));
- assert(AMDGPU::VReg_32RegClass.contains(Idx));
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
- // Save the EXEC mask
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
- .addReg(AMDGPU::EXEC);
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
- // Read the next variant into VCC (lower 32 bits) <- also loop target
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- AMDGPU::VCC_LO)
- .addReg(Idx);
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ AMDGPU::VCC_LO)
+ .addReg(Idx);
- // Move index from VCC into M0
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(AMDGPU::VCC_LO);
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC_LO);
- // Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
- // Update EXEC, save the original EXEC value to VCC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
- // Do the actual move
- MBB.insert(I, MovRel);
+ // Do the actual move
+ MBB.insert(I, MovRel);
- // Update EXEC, switch all done bits to 0 and all todo bits to 1
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
- // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
- // Restore EXEC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addReg(Save);
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+ }
+ // FIXME: Are there any values other than the LDS address clamp that need to
+ // be stored in the m0 register and may be live for more than a few
+ // instructions? If so, we should save the m0 register at the beginning
+ // of this function and restore it here.
+ // FIXME: Add support for LDS direct loads.
+ InitM0ForLDS(&MI);
MI.eraseFromParent();
}
@@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
// Initialize M0 to a value that won't cause LDS access to be discarded
// due to offset clamping
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
+ InitM0ForLDS(MBB.getFirstNonPHI());
}
if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index af60995..e2df950 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -62,8 +62,10 @@ static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
return VGPR;
}
}
- MF->getFunction()->getContext().emitError(
- "Could not found S_ENGPGM instrtuction.");
+
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Could not find S_ENDPGM instruction.");
+
return VGPR;
}
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index c72d549..d0b677a 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,21 +14,20 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
-: AMDGPURegisterInfo(tm),
- TM(tm)
+SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
+: AMDGPURegisterInfo(st)
{ }
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::EXEC);
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
TII->reserveIndirectRegisters(Reserved, MF);
return Reserved;
}
@@ -38,15 +37,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return RC->getNumRegs();
}
-const TargetRegisterClass *
-SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- return &AMDGPU::VReg_32RegClass;
- default: return rc;
- }
-}
-
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
@@ -135,3 +125,19 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
unsigned Index = getHWRegIndex(Reg);
return SubRC->getRegister(Index + Channel);
}
+
+bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const {
+ switch (RCID) {
+ default: return false;
+ case AMDGPU::SSrc_32RegClassID:
+ case AMDGPU::SSrc_64RegClassID:
+ case AMDGPU::VSrc_32RegClassID:
+ case AMDGPU::VSrc_64RegClassID:
+ return true;
+ }
+}
+
+bool SIRegisterInfo::regClassCanUseImmediate(
+ const TargetRegisterClass *RC) const {
+ return regClassCanUseImmediate(RC->getID());
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 36b4fcd..c9305fb 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -20,24 +20,15 @@
namespace llvm {
-class AMDGPUTargetMachine;
-
struct SIRegisterInfo : public AMDGPURegisterInfo {
- AMDGPUTargetMachine &TM;
- SIRegisterInfo(AMDGPUTargetMachine &tm);
+ SIRegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns the SI register class that is equivalent to \p RC.
- const TargetRegisterClass *
- getISARegClass(const TargetRegisterClass *RC) const override;
-
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
@@ -69,6 +60,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \returns The sub-register of Reg that is in Channel.
unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
unsigned Channel) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// inline immediates.
+ bool regClassCanUseImmediate(int RCID) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// inline immediates.
+ bool regClassCanUseImmediate(const TargetRegisterClass *RC) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index f1f01de..8974b63 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
(add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index a0b6907..367963a 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -119,8 +119,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
Type::getInt32Ty(I.getContext())){
Type *ElementTy = Arg->getType()->getVectorElementType();
std::string TypeName = "i32";
- InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
- assert(Def);
+ InsertElementInst *Def = cast<InsertElementInst>(Arg);
Args.push_back(Def->getOperand(1));
Types.push_back(ElementTy);
std::string VecTypeName = "v1" + TypeName;
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index da88820..9df0054 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -47,31 +47,27 @@ class SparcAsmParser : public MCTargetAsmParser {
// public interface of the MCTargetAsmParser.
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
// Custom parse functions for Sparc specific operands.
- OperandMatchResultTy
- parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
- OperandMatchResultTy
- parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Name);
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
OperandMatchResultTy
- parseSparcAsmOperand(SparcOperand *&Operand, bool isCall = false);
+ parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Operand,
+ bool isCall = false);
- OperandMatchResultTy
- parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
// returns true if Tok is matched to a register and returns register in RegNo.
bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
@@ -153,8 +149,6 @@ private:
SMLoc StartLoc, EndLoc;
- SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
-
struct Token {
const char *Data;
unsigned Length;
@@ -182,6 +176,8 @@ private:
struct MemOp Mem;
};
public:
+ SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
bool isToken() const override { return Kind == k_Token; }
bool isReg() const override { return Kind == k_Register; }
bool isImm() const override { return Kind == k_Immediate; }
@@ -291,8 +287,8 @@ public:
addExpr(Inst, Expr);
}
- static SparcOperand *CreateToken(StringRef Str, SMLoc S) {
- SparcOperand *Op = new SparcOperand(k_Token);
+ static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -300,10 +296,9 @@ public:
return Op;
}
- static SparcOperand *CreateReg(unsigned RegNum,
- unsigned Kind,
- SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_Register);
+ static std::unique_ptr<SparcOperand> CreateReg(unsigned RegNum, unsigned Kind,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->Reg.Kind = (SparcOperand::RegisterKind)Kind;
Op->StartLoc = S;
@@ -311,49 +306,51 @@ public:
return Op;
}
- static SparcOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_Immediate);
+ static std::unique_ptr<SparcOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static SparcOperand *MorphToDoubleReg(SparcOperand *Op) {
- unsigned Reg = Op->getReg();
- assert(Op->Reg.Kind == rk_FloatReg);
+ static bool MorphToDoubleReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
+ assert(Op.Reg.Kind == rk_FloatReg);
unsigned regIdx = Reg - Sparc::F0;
if (regIdx % 2 || regIdx > 31)
- return nullptr;
- Op->Reg.RegNum = DoubleRegs[regIdx / 2];
- Op->Reg.Kind = rk_DoubleReg;
- return Op;
+ return false;
+ Op.Reg.RegNum = DoubleRegs[regIdx / 2];
+ Op.Reg.Kind = rk_DoubleReg;
+ return true;
}
- static SparcOperand *MorphToQuadReg(SparcOperand *Op) {
- unsigned Reg = Op->getReg();
+ static bool MorphToQuadReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
unsigned regIdx = 0;
- switch (Op->Reg.Kind) {
- default: assert(0 && "Unexpected register kind!");
+ switch (Op.Reg.Kind) {
+ default: llvm_unreachable("Unexpected register kind!");
case rk_FloatReg:
regIdx = Reg - Sparc::F0;
if (regIdx % 4 || regIdx > 31)
- return nullptr;
+ return false;
Reg = QuadFPRegs[regIdx / 4];
break;
case rk_DoubleReg:
regIdx = Reg - Sparc::D0;
if (regIdx % 2 || regIdx > 31)
- return nullptr;
+ return false;
Reg = QuadFPRegs[regIdx / 2];
break;
}
- Op->Reg.RegNum = Reg;
- Op->Reg.Kind = rk_QuadReg;
- return Op;
+ Op.Reg.RegNum = Reg;
+ Op.Reg.Kind = rk_QuadReg;
+ return true;
}
- static SparcOperand *MorphToMEMrr(unsigned Base, SparcOperand *Op) {
+ static std::unique_ptr<SparcOperand>
+ MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) {
unsigned offsetReg = Op->getReg();
Op->Kind = k_MemoryReg;
Op->Mem.Base = Base;
@@ -362,10 +359,9 @@ public:
return Op;
}
- static SparcOperand *CreateMEMri(unsigned Base,
- const MCExpr *Off,
- SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_MemoryImm);
+ static std::unique_ptr<SparcOperand>
+ CreateMEMri(unsigned Base, const MCExpr *Off, SMLoc S, SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_MemoryImm);
Op->Mem.Base = Base;
Op->Mem.OffsetReg = 0;
Op->Mem.Off = Off;
@@ -374,7 +370,8 @@ public:
return Op;
}
- static SparcOperand *MorphToMEMri(unsigned Base, SparcOperand *Op) {
+ static std::unique_ptr<SparcOperand>
+ MorphToMEMri(unsigned Base, std::unique_ptr<SparcOperand> Op) {
const MCExpr *Imm = Op->getImm();
Op->Kind = k_MemoryImm;
Op->Mem.Base = Base;
@@ -386,11 +383,11 @@ public:
} // end namespace
-bool SparcAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
@@ -415,7 +412,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((SparcOperand*) Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((SparcOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -450,11 +447,9 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
unsigned VariantID);
-bool SparcAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands)
-{
+bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
// First operand in MCInst is instruction mnemonic.
Operands.push_back(SparcOperand::CreateToken(Name, NameLoc));
@@ -548,9 +543,8 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
-{
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
SMLoc S, E;
unsigned BaseReg = 0;
@@ -575,23 +569,20 @@ parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
break;
}
- SparcOperand *Offset = nullptr;
+ std::unique_ptr<SparcOperand> Offset;
OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset);
if (ResTy != MatchOperand_Success || !Offset)
return MatchOperand_NoMatch;
- Offset = (Offset->isImm()
- ? SparcOperand::MorphToMEMri(BaseReg, Offset)
- : SparcOperand::MorphToMEMrr(BaseReg, Offset));
+ Operands.push_back(
+ Offset->isImm() ? SparcOperand::MorphToMEMri(BaseReg, std::move(Offset))
+ : SparcOperand::MorphToMEMrr(BaseReg, std::move(Offset)));
- Operands.push_back(Offset);
return MatchOperand_Success;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic)
-{
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -637,21 +628,21 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return MatchOperand_Success;
}
- SparcOperand *Op = nullptr;
+ std::unique_ptr<SparcOperand> Op;
ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call"));
if (ResTy != MatchOperand_Success || !Op)
return MatchOperand_ParseFail;
// Push the parsed operand into the list of operands
- Operands.push_back(Op);
+ Operands.push_back(std::move(Op));
return MatchOperand_Success;
}
SparcAsmParser::OperandMatchResultTy
-SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall)
-{
+SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
+ bool isCall) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -718,8 +709,8 @@ SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall)
return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
// parse (,a|,pn|,pt)+
@@ -928,18 +919,14 @@ extern "C" void LLVMInitializeSparcAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "SparcGenAsmMatcher.inc"
-
-
-unsigned SparcAsmParser::
-validateTargetOperandClass(MCParsedAsmOperand *GOp,
- unsigned Kind)
-{
- SparcOperand *Op = (SparcOperand*)GOp;
- if (Op->isFloatOrDoubleReg()) {
+unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
+ unsigned Kind) {
+ SparcOperand &Op = (SparcOperand &)GOp;
+ if (Op.isFloatOrDoubleReg()) {
switch (Kind) {
default: break;
case MCK_DFPRegs:
- if (!Op->isFloatReg() || SparcOperand::MorphToDoubleReg(Op))
+ if (!Op.isFloatReg() || SparcOperand::MorphToDoubleReg(Op))
return MCTargetAsmParser::Match_Success;
break;
case MCK_QFPRegs:
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 261fb38..5975a51 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
raw_ostream &O)
{
- assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX.");
+ llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
return true;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 7d517b6..dcd81e3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -196,12 +196,12 @@ namespace {
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
// FIXME.
- assert(0 && "fixupNeedsRelaxation() unimplemented");
+ llvm_unreachable("fixupNeedsRelaxation() unimplemented");
return false;
}
void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
// FIXME.
- assert(0 && "relaxInstruction() unimplemented");
+ llvm_unreachable("relaxInstruction() unimplemented");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index b19ad7b..eea9626 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
if (Expr->EvaluateAsAbsolute(Res))
return Res;
- assert(0 && "Unhandled expression!");
+ llvm_unreachable("Unhandled expression!");
return 0;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index ae57fdc..7f01ab0 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ELF.h"
@@ -124,7 +125,7 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
switch (Kind) {
- default: assert(0 && "Unhandled SparcMCExpr::VariantKind");
+ default: llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
case VK_Sparc_LO: return Sparc::fixup_sparc_lo10;
case VK_Sparc_HI: return Sparc::fixup_sparc_hi22;
case VK_Sparc_H44: return Sparc::fixup_sparc_h44;
@@ -219,35 +220,6 @@ void SparcMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that at least three other backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void SparcMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void SparcMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index 78dd945..f0d0ef3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -88,7 +88,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index a37da94..3cdfda3 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -14,6 +14,7 @@
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,6 +33,9 @@ DisableLeafProc("disable-sparc-leaf-proc",
cl::desc("Disable Sparc leaf procedure optimization."),
cl::Hidden);
+SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {}
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -99,7 +103,9 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SAVEri = SP::ADDri;
SAVErr = SP::ADDrr;
}
- NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
+ NumBytes =
+ -MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
+ NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
MachineModuleInfo &MMI = MF.getMMI();
@@ -162,7 +168,8 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes == 0)
return;
- NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
+ NumBytes = MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
+ NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index bda7b7c..a7d1b89 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -15,19 +15,14 @@
#define SPARC_FRAMEINFO_H
#include "Sparc.h"
-#include "SparcSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class SparcSubtarget;
+class SparcSubtarget;
class SparcFrameLowering : public TargetFrameLowering {
- const SparcSubtarget &SubTarget;
public:
- explicit SparcFrameLowering(const SparcSubtarget &ST)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
- SubTarget(ST) {}
+ explicit SparcFrameLowering(const SparcSubtarget &ST);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index ef61466..990f52a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2030,7 +2030,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Op)).setChain(Chain)
- .setCallee(CallingConv::C, RetTyABI, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTyABI, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -2086,7 +2086,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain)
- .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
index c775e9e..d0eec98 100644
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -213,7 +213,8 @@ extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+ llvm_unreachable("FIXME: Implement SparcJITInfo::"
+ "replaceMachineCodeForFunction");
}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index eb36d29..a308fc5 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "SparcTargetMachine.h"
+#include "SparcSelectionDAGInfo.h"
using namespace llvm;
#define DEBUG_TYPE "sparc-selectiondag-info"
-SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {
}
SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index dcd4203..2346f41 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class SparcTargetMachine;
class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+ explicit SparcSelectionDAGInfo(const DataLayout &DL);
~SparcSelectionDAGInfo();
};
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index e38fb02..eea0c8c 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -26,20 +26,44 @@ using namespace llvm;
void SparcSubtarget::anchor() { }
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit) :
- SparcGenSubtargetInfo(TT, CPU, FS),
- IsV9(false),
- V8DeprecatedInsts(false),
- IsVIS(false),
- Is64Bit(is64Bit),
- HasHardQuad(false),
- UsePopc(false) {
+static std::string computeDataLayout(const SparcSubtarget &ST) {
+ // Sparc is big endian.
+ std::string Ret = "E-m:e";
+
+ // Some ABIs have 32bit pointers.
+ if (!ST.is64Bit())
+ Ret += "-p:32:32";
+
+ // Alignments for 64 bit integers.
+ Ret += "-i64:64";
+
+ // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
+ // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
+ if (ST.is64Bit())
+ Ret += "-n32:64";
+ else
+ Ret += "-f128:64-n32";
+
+ if (ST.is64Bit())
+ Ret += "-S128";
+ else
+ Ret += "-S64";
+
+ return Ret;
+}
+
+SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ IsV9 = false;
+ V8DeprecatedInsts = false;
+ IsVIS = false;
+ HasHardQuad = false;
+ UsePopc = false;
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = (is64Bit) ? "v9" : "v8";
+ CPUName = (Is64Bit) ? "v9" : "v8";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
@@ -47,8 +71,16 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
// Popc is a v9-only instruction.
if (!IsV9)
UsePopc = false;
+
+ return *this;
}
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool is64Bit)
+ : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+ InstrInfo(*this), TLInfo(TM), TSInfo(DL), FrameLowering(*this) {}
int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 4025622..a335778 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -14,6 +14,13 @@
#ifndef SPARC_SUBTARGET_H
#define SPARC_SUBTARGET_H
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcJITInfo.h"
+#include "SparcSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -31,10 +38,26 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool Is64Bit;
bool HasHardQuad;
bool UsePopc;
+ const DataLayout DL; // Calculates type size & alignment
+ SparcInstrInfo InstrInfo;
+ SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
+ SparcFrameLowering FrameLowering;
+ SparcJITInfo JITInfo;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64bit);
+ const std::string &FS, TargetMachine &TM, bool is64bit);
+
+ const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const SparcTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ SparcJITInfo *getJITInfo() { return &JITInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
@@ -47,6 +70,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ SparcSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
bool is64Bit() const { return Is64Bit; }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 2469d93..0130fac 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -23,32 +23,6 @@ extern "C" void LLVMInitializeSparcTarget() {
RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
}
-static std::string computeDataLayout(const SparcSubtarget &ST) {
- // Sparc is big endian.
- std::string Ret = "E-m:e";
-
- // Some ABIs have 32bit pointers.
- if (!ST.is64Bit())
- Ret += "-p:32:32";
-
- // Alignments for 64 bit integers.
- Ret += "-i64:64";
-
- // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
- // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
- if (ST.is64Bit())
- Ret += "-n32:64";
- else
- Ret += "-f128:64-n32";
-
- if (ST.is64Bit())
- Ret += "-S128";
- else
- Ret += "-S64";
-
- return Ret;
-}
-
/// SparcTargetMachine ctor - Create an ILP32 architecture model
///
SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
@@ -58,11 +32,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit),
- DL(computeDataLayout(Subtarget)),
- InstrInfo(Subtarget),
- TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 7d04338..03b5137 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,50 +14,40 @@
#ifndef SPARCTARGETMACHINE_H
#define SPARCTARGETMACHINE_H
-#include "SparcFrameLowering.h"
-#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
-#include "SparcJITInfo.h"
-#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- SparcInstrInfo InstrInfo;
- SparcTargetLowering TLInfo;
- SparcSelectionDAGInfo TSInfo;
- SparcFrameLowering FrameLowering;
- SparcJITInfo JITInfo;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit);
- const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ const SparcInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
}
const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const SparcRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
- const SparcTargetLowering* getTargetLowering() const override {
- return &TLInfo;
+ const SparcTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
}
- const SparcSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- SparcJITInfo *getJITInfo() override {
- return &JITInfo;
+ SparcJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 71de64f..758be41 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -104,10 +104,6 @@ private:
MemOp Mem;
};
- SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
- : Kind(kind), StartLoc(startLoc), EndLoc(endLoc)
- {}
-
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
@@ -119,40 +115,44 @@ private:
}
public:
+ SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+ : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {}
+
// Create particular kinds of operand.
- static SystemZOperand *createInvalid(SMLoc StartLoc, SMLoc EndLoc) {
- return new SystemZOperand(KindInvalid, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc,
+ SMLoc EndLoc) {
+ return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
- static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
- SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
+ static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
+ auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
}
- static SystemZOperand *createReg(RegisterKind Kind, unsigned Num,
- SMLoc StartLoc, SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
Op->Reg.Kind = Kind;
Op->Reg.Num = Num;
return Op;
}
- static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createAccessReg(unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindAccessReg, StartLoc, EndLoc);
Op->AccessReg = Num;
return Op;
}
- static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
- static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
- const MCExpr *Disp, unsigned Index,
- const MCExpr *Length, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp,
+ unsigned Index, const MCExpr *Length, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
Op->Mem.RegKind = RegKind;
Op->Mem.Base = Base;
Op->Mem.Index = Index;
@@ -313,21 +313,19 @@ private:
bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
bool IsAddress = false);
- OperandMatchResultTy
- parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- RegisterGroup Group, const unsigned *Regs, RegisterKind Kind);
+ OperandMatchResultTy parseRegister(OperandVector &Operands,
+ RegisterGroup Group, const unsigned *Regs,
+ RegisterKind Kind);
bool parseAddress(unsigned &Base, const MCExpr *&Disp,
unsigned &Index, const MCExpr *&Length,
const unsigned *Regs, RegisterKind RegKind);
- OperandMatchResultTy
- parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs, RegisterKind RegKind,
- MemoryKind MemKind);
+ OperandMatchResultTy parseAddress(OperandVector &Operands,
+ const unsigned *Regs, RegisterKind RegKind,
+ MemoryKind MemKind);
- bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
@@ -343,87 +341,66 @@ public:
// Override MCTargetAsmParser.
bool ParseDirective(AsmToken DirectiveID) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands)
- override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
// Used by the TableGen code to parse particular operand types.
- OperandMatchResultTy
- parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
}
- OperandMatchResultTy
- parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGRH32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
}
- OperandMatchResultTy
- parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGRX32(OperandVector &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
- OperandMatchResultTy
- parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR64(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}
- OperandMatchResultTy
- parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR128(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
}
- OperandMatchResultTy
- parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
}
- OperandMatchResultTy
- parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR64(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
}
- OperandMatchResultTy
- parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
- OperandMatchResultTy
- parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP32(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
}
- OperandMatchResultTy
- parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP64(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
}
- OperandMatchResultTy
- parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP128(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
}
- OperandMatchResultTy
- parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
}
- OperandMatchResultTy
- parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
}
- OperandMatchResultTy
- parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
}
- OperandMatchResultTy
- parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
}
- OperandMatchResultTy
- parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- OperandMatchResultTy
- parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int64_t MinVal, int64_t MaxVal);
- OperandMatchResultTy
- parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseAccessReg(OperandVector &Operands);
+ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal);
+ OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
}
- OperandMatchResultTy
- parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
}
};
@@ -497,9 +474,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
// Parse a register and add it to Operands. The other arguments are as above.
SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- RegisterGroup Group, const unsigned *Regs,
- RegisterKind Kind) {
+SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
+ const unsigned *Regs, RegisterKind Kind) {
if (Parser.getTok().isNot(AsmToken::Percent))
return MatchOperand_NoMatch;
@@ -566,9 +542,8 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
// Parse a memory operand and add it to Operands. The other arguments
// are as above.
SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs, RegisterKind RegKind,
- MemoryKind MemKind) {
+SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs,
+ RegisterKind RegKind, MemoryKind MemKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base, Index;
const MCExpr *Disp;
@@ -622,9 +597,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return false;
}
-bool SystemZAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
// Read the remaining operands.
@@ -655,9 +630,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool SystemZAsmParser::
-parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic) {
+bool SystemZAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -700,11 +674,11 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
}
-bool SystemZAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -739,7 +713,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -753,8 +727,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Unexpected match type");
}
-SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
-parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAccessReg(OperandVector &Operands) {
if (Parser.getTok().isNot(AsmToken::Percent))
return MatchOperand_NoMatch;
@@ -768,9 +742,9 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
-parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int64_t MinVal, int64_t MaxVal) {
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal) {
MCContext &Ctx = getContext();
MCStreamer &Out = getStreamer();
const MCExpr *Expr;
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index c4f641e..fb0d1d8 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -13,7 +13,7 @@ class CCIfExtend<CCAction A>
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
//===----------------------------------------------------------------------===//
-// SVR4 return value calling convention
+// z/Linux return value calling convention
//===----------------------------------------------------------------------===//
def RetCC_SystemZ : CallingConv<[
// Promote i32 to i64 if it has an explicit extension type.
@@ -39,7 +39,7 @@ def RetCC_SystemZ : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
-// SVR4 argument calling conventions
+// z/Linux argument calling conventions
//===----------------------------------------------------------------------===//
def CC_SystemZ : CallingConv<[
// Promote i32 to i64 if it has an explicit extension type.
@@ -63,3 +63,9 @@ def CC_SystemZ : CallingConv<[
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
+
+//===----------------------------------------------------------------------===//
+// z/Linux callee-saved registers
+//===----------------------------------------------------------------------===//
+def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15),
+ (sequence "F%dD", 8, 15))>;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 65f3caf..055dbe9 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -10,8 +10,9 @@
#include "SystemZFrameLowering.h"
#include "SystemZCallingConv.h"
#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
-#include "SystemZTargetMachine.h"
+#include "SystemZRegisterInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -44,11 +45,9 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
};
} // end anonymous namespace
-SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
- const SystemZSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize, 8),
- TM(tm), STI(sti) {
+SystemZFrameLowering::SystemZFrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+ -SystemZMC::CallFrameSize, 8) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
@@ -108,9 +107,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// instruction, or an implicit one that comes between the explicit start
// and end registers.
static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
- const SystemZTargetMachine &TM,
unsigned GPR64, bool IsImplicit) {
- const SystemZRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetRegisterInfo *RI = MBB.getParent()->getTarget().getRegisterInfo();
unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
@@ -176,8 +174,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
// Add the explicit register operands.
- addSavedGPR(MBB, MIB, TM, LowGPR, false);
- addSavedGPR(MBB, MIB, TM, HighGPR, false);
+ addSavedGPR(MBB, MIB, LowGPR, false);
+ addSavedGPR(MBB, MIB, HighGPR, false);
// Add the address.
MIB.addReg(SystemZ::R15D).addImm(StartOffset);
@@ -187,13 +185,13 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
if (SystemZ::GR64BitRegClass.contains(Reg))
- addSavedGPR(MBB, MIB, TM, Reg, true);
+ addSavedGPR(MBB, MIB, Reg, true);
}
// ...likewise GPR varargs.
if (IsVarArg)
for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
- addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true);
+ addSavedGPR(MBB, MIB, SystemZ::ArgGPRs[I], true);
}
// Save FPRs in the normal TargetInstrInfo way.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 70e25fb..4d5fe6d 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -10,7 +10,6 @@
#ifndef SYSTEMZFRAMELOWERING_H
#define SYSTEMZFRAMELOWERING_H
-#include "SystemZSubtarget.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -21,13 +20,8 @@ class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
IndexedMap<unsigned> RegSpillOffsets;
-protected:
- const SystemZTargetMachine &TM;
- const SystemZSubtarget &STI;
-
public:
- SystemZFrameLowering(const SystemZTargetMachine &tm,
- const SystemZSubtarget &sti);
+ SystemZFrameLowering();
// Override TargetFrameLowering.
bool isFPCloseToIncomingSP() const override { return false; }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6fe1fb9..00c65f5 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -80,9 +80,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
return Op;
}
-SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
- : TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
+ : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
MVT PtrVT = getPointerTy();
// Set up the register classes.
@@ -673,11 +673,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- auto *TFL = static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+ auto *TFL = static_cast<const SystemZFrameLowering *>(
+ DAG.getTarget().getFrameLowering());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
@@ -815,7 +817,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
+ *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -911,6 +914,12 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[I].first,
RegsToPass[I].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
// Glue the call to the argument copies, if any.
if (Glue.getNode())
Ops.push_back(Glue);
@@ -931,7 +940,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
+ *DAG.getContext());
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
@@ -962,7 +972,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
+ *DAG.getContext());
RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
// Quick exit for void returns
@@ -1786,8 +1797,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
EVT PtrVT = getPointerTy();
- Reloc::Model RM = TM.getRelocationModel();
- CodeModel::Model CM = TM.getCodeModel();
+ Reloc::Model RM = DAG.getTarget().getRelocationModel();
+ CodeModel::Model CM = DAG.getTarget().getCodeModel();
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
@@ -1824,7 +1835,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
EVT PtrVT = getPointerTy();
- TLSModel::Model model = TM.getTLSModel(GV);
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
if (model != TLSModel::LocalExec)
llvm_unreachable("only local-exec TLS mode supported");
@@ -2287,9 +2298,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
// Use an addition if the operand is constant and either LAA(G) is
// available or the negative value is in the range of A(G)FHI.
int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
- if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1())
+ if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
NegSrc2 = DAG.getConstant(Value, MemVT);
- } else if (TM.getSubtargetImpl()->hasInterlockedAccess1())
+ } else if (Subtarget.hasInterlockedAccess1())
// Use LAA(G) if available.
NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
Src2);
@@ -2602,7 +2613,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
+ const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
+ MBB->getParent()->getTarget().getInstrInfo());
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
@@ -2650,7 +2662,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
+ const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
+ MBB->getParent()->getTarget().getInstrInfo());
unsigned SrcReg = MI->getOperand(0).getReg();
MachineOperand Base = MI->getOperand(1);
@@ -2665,7 +2678,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
// Use STOCOpcode if possible. We could use different store patterns in
// order to avoid matching the index register, but the performance trade-offs
// might be more complicated in that case.
- if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
@@ -2717,8 +2730,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
unsigned BinOpcode,
unsigned BitSize,
bool Invert) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2840,8 +2854,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
unsigned CompareOpcode,
unsigned KeepOldMask,
unsigned BitSize) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2951,8 +2966,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
MachineBasicBlock *
SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -3067,8 +3083,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3098,8 +3115,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3267,8 +3285,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index bceb25e..e21b050 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -198,7 +198,7 @@ class SystemZTargetMachine;
class SystemZTargetLowering : public TargetLowering {
public:
- explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+ explicit SystemZTargetLowering(const TargetMachine &TM);
// Override TargetLowering.
MVT getScalarShiftAmountTy(EVT LHSTy) const override {
@@ -249,7 +249,6 @@ public:
private:
const SystemZSubtarget &Subtarget;
- const SystemZTargetMachine &TM;
// Implement LowerOperation for individual opcodes.
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index a1e782c..e8841e1 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -133,6 +133,13 @@ def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>;
def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
+def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32, FP64>,
+ Requires<[FeatureFPExtension]>;
+def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+
def : Pat<(f32 (fround FP128:$src)),
(EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fround FP128:$src)),
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index add675a..9f59a1c 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -511,34 +511,24 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// to store. Other stored registers are added as implicit uses.
//
// Unary:
-// One register output operand and one input operand. The input
-// operand may be a register, immediate or memory.
+// One register output operand and one input operand.
//
// Binary:
-// One register output operand and two input operands. The first
-// input operand is always a register and the second may be a register,
-// immediate or memory.
-//
-// Shift:
-// One register output operand and two input operands. The first
-// input operand is a register and the second has the same form as
-// an address (although it isn't actually used to address memory).
+// One register output operand and two input operands.
//
// Compare:
-// Two input operands. The first operand is always a register,
-// the second may be a register, immediate or memory.
+// Two input operands and an implicit CC output operand.
//
// Ternary:
-// One register output operand and three register input operands.
+// One register output operand and three input operands.
//
// LoadAndOp:
-// One output operand and two input operands. The first input operand
-// is a register and the second is an address.
+// One output operand and two input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
//
// CmpSwap:
-// One output operand and three input operands. The first two
-// operands are registers and the third is an address. The instruction
-// both reads from and writes to the address.
+// One output operand and three input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
//
// RotateSelect:
// One output operand and five input operands. The first two operands
@@ -691,7 +681,7 @@ class CondStoreRSY<string mnemonic, bits<16> opcode,
class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3),
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$R3),
mnemonic#"\t$R1, $BD2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayStore = 1;
@@ -730,7 +720,7 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2),
+ : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2),
mnemonic#"r\t$R1, $R3, $R2", []> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
@@ -739,7 +729,7 @@ class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4),
+ : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2, imm32zx4:$R4),
mnemonic#"\t$R1, $R3, $R2, $R4", []>;
// These instructions are generated by if conversion. The old value of R1
@@ -757,7 +747,7 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
// mask is the third operand rather than being part of the mnemonic.
class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3),
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, imm32zx4:$R3),
mnemonic#"r\t$R1, $R2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let Constraints = "$R1 = $R1src";
@@ -823,7 +813,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3),
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$R3),
mnemonic#"\t$R1, $BD2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayLoad = 1;
@@ -993,6 +983,33 @@ class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
let DisableEncoding = "$R1src";
}
+class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+ mnemonic#"\t$R1, $BD2",
+ [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
+ let R3 = 0;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
+
+multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
+ }
+}
+
class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
@@ -1077,33 +1094,6 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
}
}
-class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls>
- : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
- mnemonic#"\t$R1, $BD2",
- [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
- let R3 = 0;
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
-}
-
-class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls>
- : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
- mnemonic#"\t$R1, $R3, $BD2",
- [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
-
-multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
- SDPatternOperator operator, RegisterOperand cls> {
- let NumOpsKey = mnemonic in {
- let NumOpsValue = "3" in
- def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>,
- Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
- def "" : ShiftRS<mnemonic, opcode1, operator, cls>;
- }
-}
-
class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
@@ -1315,22 +1305,23 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRIEf<opcode, (outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
- : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ : InstRXY<opcode, (outs), (ins imm32zx4:$R1, bdxaddr20only:$XBD2),
mnemonic##"\t$R1, $XBD2",
- [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+ [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
- : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ : InstRIL<opcode, (outs), (ins imm32zx4:$R1, pcrel32:$I2),
mnemonic##"\t$R1, $I2",
- [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ [(operator imm32zx4:$R1, pcrel32:$I2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@@ -1450,7 +1441,8 @@ class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
// of registers.
class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
: Pseudo<(outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
[]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -1460,9 +1452,9 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
: Pseudo<(outs cls:$dst),
- (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
[(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
- uimm8zx4:$valid, uimm8zx4:$cc))]> {
+ imm32zx4:$valid, imm32zx4:$cc))]> {
let usesCustomInserter = 1;
// Although the instructions used by these nodes do not in themselves
// change CC, the insertion requires new blocks, and CC cannot be live
@@ -1476,14 +1468,14 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
SDPatternOperator load, AddressingMode mode> {
let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
def "" : Pseudo<(outs),
- (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>;
def Inv : Pseudo<(outs),
- (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new,
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>;
}
}
@@ -1611,6 +1603,7 @@ class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
// An alias of a RotateSelectRIEf, but with different register sizes.
class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5), []> {
let Constraints = "$R1 = $R1src";
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6a18b2d..f58ab47 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -40,9 +40,9 @@ static bool isHighReg(unsigned int Reg) {
// Pin the vtable to this file.
void SystemZInstrInfo::anchor() {}
-SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
- RI(tm), TM(tm) {
+ RI(), STI(sti) {
}
// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
@@ -488,7 +488,7 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
if (Value == 0 &&
!IsLogical &&
- removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+ removeIPMBasedCompare(Compare, SrcReg, MRI, &RI))
return true;
return false;
}
@@ -505,7 +505,7 @@ static unsigned getConditionalMove(unsigned Opcode) {
bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned Opcode = MI->getOpcode();
- if (TM.getSubtargetImpl()->hasLoadStoreOnCond() &&
+ if (STI.hasLoadStoreOnCond() &&
getConditionalMove(Opcode))
return true;
return false;
@@ -537,7 +537,7 @@ PredicateInstruction(MachineInstr *MI,
unsigned CCMask = Pred[1].getImm();
assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
unsigned Opcode = MI->getOpcode();
- if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (STI.hasLoadStoreOnCond()) {
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
MI->setDesc(get(CondOpcode));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
@@ -685,7 +685,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// We prefer to keep the two-operand form where possible both
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
- if (TM.getSubtargetImpl()->hasDistinctOps()) {
+ if (STI.hasDistinctOps()) {
MachineOperand &Dest = MI->getOperand(0);
MachineOperand &Src = MI->getOperand(1);
unsigned DestReg = Dest.getReg();
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 09aee5d..83009cb 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -110,9 +110,10 @@ struct Branch {
};
} // end namespace SystemZII
+class SystemZSubtarget;
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
- SystemZTargetMachine &TM;
+ SystemZSubtarget &STI;
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
@@ -130,7 +131,7 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
virtual void anchor();
public:
- explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+ explicit SystemZInstrInfo(SystemZSubtarget &STI);
// Override TargetInstrInfo.
unsigned isLoadFromStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index e70df92..f4951ad 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -63,11 +63,11 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
brtarget32:$I2), "jg$R1\t$I2", []>;
}
- def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2),
+ def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2),
"brc\t$R1, $I2", []>;
- def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2),
+ def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2),
"brcl\t$R1, $I2", []>;
- def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2),
+ def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
"bcr\t$R1, $R2", []>;
}
@@ -109,7 +109,7 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
}
let isCodeGenOnly = 1 in
defm C : CompareBranches<cond4, "$M3", "">;
-defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">;
+defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">;
// Define AsmParser mnemonics for each general condition-code mask
// (integer or floating-point)
@@ -233,9 +233,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// Call instructions
//===----------------------------------------------------------------------===//
-// The definitions here are for the call-clobbered registers.
-let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in {
+let isCall = 1, Defs = [R14D, CC] in {
def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
[(z_call pcrel32:$I2)]>;
def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
@@ -855,7 +853,7 @@ let Defs = [CC] in {
}
// AND to memory
- defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+ defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>;
// Block AND.
let mayLoad = 1, mayStore = 1 in
@@ -912,7 +910,7 @@ let Defs = [CC] in {
}
// OR to memory
- defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+ defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>;
// Block OR.
let mayLoad = 1, mayStore = 1 in
@@ -952,7 +950,7 @@ let Defs = [CC] in {
}
// XOR to memory
- defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+ defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>;
// Block XOR.
let mayLoad = 1, mayStore = 1 in
@@ -1015,26 +1013,26 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
// Shift left.
let neverHasSideEffects = 1 in {
- defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
- def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>;
+ defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+ def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
}
// Logical shift right.
let neverHasSideEffects = 1 in {
- defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
- def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>;
+ defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+ def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
}
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
- def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
}
// Rotate left.
let neverHasSideEffects = 1 in {
- def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>;
- def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>;
+ def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
+ def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
}
// Rotate second operand left and inserted selected bits into first operand.
@@ -1403,15 +1401,15 @@ def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
// for vector legalization.
-def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)),
+def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)),
(i32 31)),
(i32 31)),
- (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
-def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid,
- uimm8zx4:$cc)))),
+ (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
+def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
+ imm32zx4:$cc)))),
(i32 63)),
(i32 63)),
- (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+ (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
// Peepholes for turning scalar operations into block operations.
defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 3ad146c..7be81dc 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -202,21 +202,6 @@ def S32Imm : ImmediateAsmOperand<"S32Imm">;
def U32Imm : ImmediateAsmOperand<"U32Imm">;
//===----------------------------------------------------------------------===//
-// 8-bit immediates
-//===----------------------------------------------------------------------===//
-
-def uimm8zx4 : Immediate<i8, [{
- return isUInt<4>(N->getZExtValue());
-}], NOOP_SDNodeXForm, "U4Imm">;
-
-def uimm8zx6 : Immediate<i8, [{
- return isUInt<6>(N->getZExtValue());
-}], NOOP_SDNodeXForm, "U6Imm">;
-
-def simm8 : Immediate<i8, [{}], SIMM8, "S8Imm">;
-def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
-
-//===----------------------------------------------------------------------===//
// i32 immediates
//===----------------------------------------------------------------------===//
@@ -241,6 +226,14 @@ def imm32lh16c : Immediate<i32, [{
}], LH16, "U16Imm">;
// Short immediates
+def imm32zx4 : Immediate<i32, [{
+ return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+def imm32zx6 : Immediate<i32, [{
+ return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
def imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
@@ -470,13 +463,13 @@ def AccessReg : AsmOperandClass {
let Name = "AccessReg";
let ParserMethod = "parseAccessReg";
}
-def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }],
+def access_reg : Immediate<i32, [{ return N->getZExtValue() < 16; }],
NOOP_SDNodeXForm, "AccessReg"> {
let ParserMatchClass = AccessReg;
}
// A 4-bit condition-code mask.
-def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>,
- Operand<i8> {
+def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
+ Operand<i32> {
let PrintMethod = "printCond4Operand";
}
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index a391961..c70e662 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -19,14 +19,14 @@ def SDT_ZICmp : SDTypeProfile<0, 3,
[SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDT_ZBRCCMask : SDTypeProfile<0, 3,
- [SDTCisVT<0, i8>,
- SDTCisVT<1, i8>,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
- SDTCisVT<3, i8>,
- SDTCisVT<4, i8>]>;
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
@@ -37,7 +37,7 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
[SDTCisVT<0, i32>,
- SDTCisVT<1, i8>]>;
+ SDTCisVT<1, i32>]>;
def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisVT<1, untyped>,
@@ -77,7 +77,7 @@ def SDT_ZString : SDTypeProfile<1, 3,
SDTCisVT<3, i32>]>;
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
- [SDTCisVT<0, i8>,
+ [SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index c0f94ec..e307f8a 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -101,15 +101,15 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
SDPatternOperator store, SDPatternOperator load,
AddressingMode mode> {
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr),
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
- uimm8zx4:$valid, uimm8zx4:$cc)>;
+ imm32zx4:$valid, imm32zx4:$cc)>;
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr),
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
- uimm8zx4:$valid, uimm8zx4:$cc)>;
+ imm32zx4:$valid, imm32zx4:$cc)>;
}
// Try to use MVC instruction INSN for a load of type LOAD followed by a store
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index a04d703..f03bcc4 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -7,31 +7,29 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
-#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "SystemZGenRegisterInfo.inc"
-SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm)
- : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {}
+SystemZRegisterInfo::SystemZRegisterInfo()
+ : SystemZGenRegisterInfo(SystemZ::R14D) {}
-const MCPhysReg*
+const MCPhysReg *
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const MCPhysReg CalleeSavedRegs[] = {
- SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
- SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
- SystemZ::R14D, SystemZ::R15D,
- SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
- SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
- 0
- };
-
- return CalleeSavedRegs;
+ return CSR_SystemZ_SaveList;
+}
+
+const uint32_t *
+SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ return CSR_SystemZ_RegMask;
}
BitVector
@@ -63,7 +61,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- auto *TII = static_cast<const SystemZInstrInfo*>(TM.getInstrInfo());
+ auto *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index e236f71..9bffa46 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -29,15 +29,9 @@ inline unsigned odd128(bool Is32bit) {
}
} // end namespace SystemZ
-class SystemZSubtarget;
-class SystemZInstrInfo;
-
struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
-private:
- SystemZTargetMachine &TM;
-
public:
- SystemZRegisterInfo(SystemZTargetMachine &tm);
+ SystemZRegisterInfo();
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
@@ -51,6 +45,7 @@ public:
}
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 93d7c83..47ac20d 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -119,6 +119,29 @@ defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
// Floating-point registers
//===----------------------------------------------------------------------===//
+// Maps FPR register numbers to their DWARF encoding.
+class DwarfMapping<int id> { int Id = id; }
+
+def F0Dwarf : DwarfMapping<16>;
+def F2Dwarf : DwarfMapping<17>;
+def F4Dwarf : DwarfMapping<18>;
+def F6Dwarf : DwarfMapping<19>;
+
+def F1Dwarf : DwarfMapping<20>;
+def F3Dwarf : DwarfMapping<21>;
+def F5Dwarf : DwarfMapping<22>;
+def F7Dwarf : DwarfMapping<23>;
+
+def F8Dwarf : DwarfMapping<24>;
+def F10Dwarf : DwarfMapping<25>;
+def F12Dwarf : DwarfMapping<26>;
+def F14Dwarf : DwarfMapping<27>;
+
+def F9Dwarf : DwarfMapping<28>;
+def F11Dwarf : DwarfMapping<29>;
+def F13Dwarf : DwarfMapping<30>;
+def F15Dwarf : DwarfMapping<31>;
+
// Lower 32 bits of one of the 16 64-bit floating-point registers
class FPR32<bits<16> num, string n> : SystemZReg<n> {
let HWEncoding = num;
@@ -142,7 +165,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
foreach I = 0-15 in {
def F#I#S : FPR32<I, "f"#I>;
def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
- DwarfRegNum<[!add(I, 16)]>;
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 97abee3..a3cba64 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-SystemZSelectionDAGInfo::
-SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 79e7fab..e9de146 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class SystemZTargetMachine;
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+ explicit SystemZSelectionDAGInfo(const DataLayout &DL);
~SystemZSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index a011157..e160bc8 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -20,16 +20,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "SystemZGenSubtargetInfo.inc"
-// Pin the vtabel to this file.
+// Pin the vtable to this file.
void SystemZSubtarget::anchor() {}
-SystemZSubtarget::SystemZSubtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
- HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
- TargetTriple(TT) {
+SystemZSubtarget &
+SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
@@ -37,11 +32,26 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
if (CPUName == "generic")
CPUName = sys::getHostCPUName();
#endif
-
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
+ return *this;
}
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS,
+ const TargetMachine &TM)
+ : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ HasFastSerialization(false), HasInterlockedAccess1(false),
+ TargetTriple(TT),
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL), FrameLowering() {}
+
// Return true if GV binds locally under reloc model RM.
static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
// For non-PIC, all symbols bind locally.
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index ffca2d8..4e8c710 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -14,6 +14,12 @@
#ifndef SYSTEMZSUBTARGET_H
#define SYSTEMZSUBTARGET_H
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -37,10 +43,26 @@ protected:
private:
Triple TargetTriple;
-
+ const DataLayout DL;
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+
+ SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const SystemZRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const SystemZTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 4c9ce29..0122e99 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -22,17 +22,10 @@ extern "C" void LLVMInitializeSystemZTarget() {
SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM,
- CodeModel::Model CM,
+ Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- // Make sure that global data has at least 16 bits of alignment by default,
- // so that we can refer to it using LARL. We don't have any special
- // requirements for stack variables though.
- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(*this, Subtarget) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
@@ -65,7 +58,8 @@ bool SystemZPassConfig::addInstSelector() {
}
bool SystemZPassConfig::addPreSched2() {
- if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
+ if (getOptLevel() != CodeGenOpt::None &&
+ getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
addPass(&IfConverterID);
return true;
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 1db717b..ded07e9 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -15,25 +15,15 @@
#ifndef SYSTEMZTARGETMACHINE_H
#define SYSTEMZTARGETMACHINE_H
-#include "SystemZFrameLowering.h"
-#include "SystemZISelLowering.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSelectionDAGInfo.h"
#include "SystemZSubtarget.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class TargetFrameLowering;
+
class SystemZTargetMachine : public LLVMTargetMachine {
SystemZSubtarget Subtarget;
- const DataLayout DL;
- SystemZInstrInfo InstrInfo;
- SystemZTargetLowering TLInfo;
- SystemZSelectionDAGInfo TSInfo;
- SystemZFrameLowering FrameLowering;
public:
SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
@@ -43,25 +33,25 @@ public:
// Override TargetMachine.
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
const SystemZInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const SystemZSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const DataLayout *getDataLayout() const override {
- return &DL;
+ return getSubtargetImpl()->getDataLayout();
}
const SystemZRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
const SystemZTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Override LLVMTargetMachine
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 8365f64..95c8cb6 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -88,8 +88,8 @@ CodeModel::Model TargetMachine::getCodeModel() const {
}
/// Get the IR-specified TLS model for Var.
-static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
- switch (Var->getThreadLocalMode()) {
+static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
+ switch (GV->getThreadLocalMode()) {
case GlobalVariable::NotThreadLocal:
llvm_unreachable("getSelectedTLSModel for non-TLS variable");
break;
@@ -127,13 +127,10 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
Model = TLSModel::InitialExec;
}
- const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
- if (Var) {
- // If the user specified a more specific model, use that.
- TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
- if (SelectedModel > Model)
- return SelectedModel;
- }
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(GV);
+ if (SelectedModel > Model)
+ return SelectedModel;
return Model;
}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 3ca13da..87b6b66 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,10 +39,23 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
return enableMachineScheduler();
}
+bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+ return true;
+}
+
bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
+bool TargetSubtargetInfo::enableRALocalReassignment(
+ CodeGenOpt::Level OptLevel) const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enablePostMachineScheduler() const {
+ return false;
+}
+
bool TargetSubtargetInfo::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index 0d0a9ca..e2c4be7 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -12,6 +12,7 @@ x86_codegen_TBLGEN_TABLES := \
x86_codegen_SRC_FILES := \
X86AsmPrinter.cpp \
+ X86AtomicExpandPass.cpp \
X86CodeEmitter.cpp \
X86FastISel.cpp \
X86FixupLEAs.cpp \
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index f3e6b3f..a365f62 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -36,8 +37,8 @@ bool IsStackReg(unsigned Reg) {
}
std::string FuncName(unsigned AccessSize, bool IsWrite) {
- return std::string("__sanitizer_sanitize_") + (IsWrite ? "store" : "load") +
- (utostr(AccessSize));
+ return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
+ utostr(AccessSize);
}
class X86AddressSanitizer : public X86AsmInstrumentation {
@@ -47,47 +48,55 @@ public:
// X86AsmInstrumentation implementation:
virtual void InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) override {
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) override {
InstrumentMOV(Inst, Operands, Ctx, MII, Out);
}
// Should be implemented differently in x86_32 and x86_64 subclasses.
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) = 0;
- void InstrumentMemOperand(MCParsedAsmOperand *Op, unsigned AccessSize,
+ void InstrumentMemOperand(MCParsedAsmOperand &Op, unsigned AccessSize,
bool IsWrite, MCContext &Ctx, MCStreamer &Out);
- void InstrumentMOV(const MCInst &Inst,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
void EmitInstruction(MCStreamer &Out, const MCInst &Inst) {
Out.EmitInstruction(Inst, STI);
}
+ void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
+
protected:
const MCSubtargetInfo &STI;
};
void X86AddressSanitizer::InstrumentMemOperand(
- MCParsedAsmOperand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
MCStreamer &Out) {
- assert(Op && Op->isMem() && "Op should be a memory operand.");
+ assert(Op.isMem() && "Op should be a memory operand.");
assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
"AccessSize should be a power of two, less or equal than 16.");
- X86Operand *MemOp = static_cast<X86Operand *>(Op);
+ X86Operand &MemOp = static_cast<X86Operand &>(Op);
// FIXME: get rid of this limitation.
- if (IsStackReg(MemOp->getMemBaseReg()) || IsStackReg(MemOp->getMemIndexReg()))
+ if (IsStackReg(MemOp.getMemBaseReg()) || IsStackReg(MemOp.getMemIndexReg()))
return;
- InstrumentMemOperandImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ // FIXME: take into account load/store alignment.
+ if (AccessSize < 8)
+ InstrumentMemOperandSmallImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ else
+ InstrumentMemOperandLargeImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
}
void X86AddressSanitizer::InstrumentMOV(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) {
// Access size in bytes.
unsigned AccessSize = 0;
@@ -124,107 +133,351 @@ void X86AddressSanitizer::InstrumentMOV(
const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
- MCParsedAsmOperand *Op = Operands[Ix];
- if (Op && Op->isMem())
+ assert(Operands[Ix]);
+ MCParsedAsmOperand &Op = *Operands[Ix];
+ if (Op.isMem())
InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out);
}
}
class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
+ static const long kShadowOffset = 0x20000000;
+
X86AddressSanitizer32(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer32() {}
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+
+ private:
+ void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
+ bool IsWrite, unsigned AddressReg) {
+ EmitInstruction(Out, MCInstBuilder(X86::CLD));
+ EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
+
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::ESP)
+ .addReg(X86::ESP).addImm(-16));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(AddressReg));
+
+
+ const std::string& Fn = FuncName(AccessSize, IsWrite);
+ MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+ const MCSymbolRefExpr *FnExpr =
+ MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
+ }
};
-void X86AddressSanitizer32::InstrumentMemOperandImpl(
- X86Operand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+void X86AddressSanitizer32::InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
MCStreamer &Out) {
- // FIXME: emit .cfi directives for correct stack unwinding.
EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EDX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+
{
MCInst Inst;
Inst.setOpcode(X86::LEA32r);
Inst.addOperand(MCOperand::CreateReg(X86::EAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+
+ {
+ MCInst Inst;
+ Inst.setOpcode(X86::MOV8rm);
+ Inst.addOperand(MCOperand::CreateReg(X86::CL));
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(Out,
+ MCInstBuilder(X86::TEST8rr).addReg(X86::CL).addReg(X86::CL));
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::EDX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::EDX)
+ .addReg(X86::EDX).addImm(7));
+
+ switch (AccessSize) {
+ case 1:
+ break;
+ case 2: {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::EDX));
+
+ const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::EDX, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
+ break;
}
+ case 4:
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::EDX)
+ .addReg(X86::EDX).addImm(3));
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::ECX).addReg(X86::CL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::CMP32rr).addReg(X86::EDX).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EDX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+}
+
+void X86AddressSanitizer32::InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+
{
- const std::string Func = FuncName(AccessSize, IsWrite);
- const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func));
- const MCSymbolRefExpr *FuncExpr =
- MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr));
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::EAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
}
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+ {
+ MCInst Inst;
+ switch (AccessSize) {
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ Inst.addOperand(MCOperand::CreateImm(0));
+ EmitInstruction(Out, Inst);
+ }
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
}
class X86AddressSanitizer64 : public X86AddressSanitizer {
public:
+ static const long kShadowOffset = 0x7fff8000;
+
X86AddressSanitizer64(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer64() {}
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
-};
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
-void X86AddressSanitizer64::InstrumentMemOperandImpl(X86Operand *Op,
- unsigned AccessSize,
- bool IsWrite,
- MCContext &Ctx,
- MCStreamer &Out) {
- // FIXME: emit .cfi directives for correct stack unwinding.
-
- // Set %rsp below current red zone (128 bytes wide) using LEA instruction to
- // preserve flags.
- {
+private:
+ void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
Inst.addOperand(MCOperand::CreateReg(X86::RSP));
- const MCExpr *Disp = MCConstantExpr::Create(-128, Ctx);
+ const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
+
+ void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
+ bool IsWrite) {
+ EmitInstruction(Out, MCInstBuilder(X86::CLD));
+ EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
+
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::RSP)
+ .addReg(X86::RSP).addImm(-16));
+
+ const std::string& Fn = FuncName(AccessSize, IsWrite);
+ MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+ const MCSymbolRefExpr *FnExpr =
+ MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
+ }
+};
+
+void X86AddressSanitizer64::InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
+ EmitAdjustRSP(Ctx, Out, -128);
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RCX));
EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
{
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
Inst.addOperand(MCOperand::CreateReg(X86::RDI));
- Op->addMemOperands(Inst, 5);
+ Op.addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RAX).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
+ .addReg(X86::RAX).addImm(3));
{
- const std::string Func = FuncName(AccessSize, IsWrite);
- const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func));
- const MCSymbolRefExpr *FuncExpr =
- MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FuncExpr));
+ MCInst Inst;
+ Inst.setOpcode(X86::MOV8rm);
+ Inst.addOperand(MCOperand::CreateReg(X86::AL));
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(Out,
+ MCInstBuilder(X86::TEST8rr).addReg(X86::AL).addReg(X86::AL));
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EDI));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(7));
+
+ switch (AccessSize) {
+ case 1:
+ break;
+ case 2: {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::ECX));
+
+ const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ break;
}
+ case 4:
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::EAX).addReg(X86::AL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RCX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
+ EmitAdjustRSP(Ctx, Out, 128);
+}
+
+void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
+ EmitAdjustRSP(Ctx, Out, -128);
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
- // Restore old %rsp value.
{
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RSP));
-
- const MCExpr *Disp = MCConstantExpr::Create(128, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(X86::RAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
+ .addReg(X86::RAX).addImm(3));
+ {
+ MCInst Inst;
+ switch (AccessSize) {
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
+ Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
}
+
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
+ EmitAdjustRSP(Ctx, Out, 128);
}
} // End anonymous namespace
@@ -233,8 +486,8 @@ X86AsmInstrumentation::X86AsmInstrumentation() {}
X86AsmInstrumentation::~X86AsmInstrumentation() {}
void X86AsmInstrumentation::InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {}
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) {}
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 0369b14..1bc3c09 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -12,6 +12,8 @@
#include "llvm/ADT/SmallVector.h"
+#include <memory>
+
namespace llvm {
class MCContext;
@@ -35,10 +37,9 @@ public:
// Instruments Inst. Should be called just before the original
// instruction is sent to Out.
virtual void InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx,
- const MCInstrInfo &MII,
- MCStreamer &Out);
+ const MCInst &Inst,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
friend X86AsmInstrumentation *
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d3e695e..f0765ed 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -235,6 +235,7 @@ private:
IES_RSHIFT,
IES_PLUS,
IES_MINUS,
+ IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
IES_LBRAC,
@@ -372,6 +373,7 @@ private:
State = IES_ERROR;
break;
case IES_PLUS:
+ case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
@@ -401,6 +403,19 @@ private:
}
PrevState = CurrState;
}
+ void onNot() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_NOT:
+ State = IES_NOT;
+ break;
+ }
+ PrevState = CurrState;
+ }
void onRegister(unsigned Reg) {
IntelExprState CurrState = State;
switch (State) {
@@ -438,6 +453,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
State = IES_INTEGER;
Sym = SymRef;
SymName = SymRefName;
@@ -453,6 +469,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
case IES_OR:
case IES_AND:
case IES_LSHIFT:
@@ -476,11 +493,22 @@ private:
PrevState == IES_OR || PrevState == IES_AND ||
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
CurrState == IES_MINUS) {
// Unary minus. No need to pop the minus operand because it was never
// pushed.
IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+ } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+ PrevState == IES_OR || PrevState == IES_AND ||
+ PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
+ PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
+ CurrState == IES_NOT) {
+ // Unary not. No need to pop the not operand because it was never
+ // pushed.
+ IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
@@ -561,6 +589,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
case IES_OR:
case IES_AND:
case IES_LSHIFT:
@@ -568,13 +597,14 @@ private:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
- // FIXME: We don't handle this type of unary minus, yet.
+ // FIXME: We don't handle this type of unary minus or not, yet.
if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
PrevState == IES_OR || PrevState == IES_AND ||
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
- CurrState == IES_MINUS) {
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
+ (CurrState == IES_MINUS || CurrState == IES_NOT)) {
State = IES_ERROR;
break;
}
@@ -618,52 +648,52 @@ private:
return Error(L, Msg, Ranges, MatchingInlineAsm);
}
- X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
+ std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
Error(Loc, Msg);
return nullptr;
}
- X86Operand *DefaultMemSIOperand(SMLoc Loc);
- X86Operand *DefaultMemDIOperand(SMLoc Loc);
- X86Operand *ParseOperand();
- X86Operand *ParseATTOperand();
- X86Operand *ParseIntelOperand();
- X86Operand *ParseIntelOffsetOfOperator();
+ std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
+ std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
+ std::unique_ptr<X86Operand> ParseOperand();
+ std::unique_ptr<X86Operand> ParseATTOperand();
+ std::unique_ptr<X86Operand> ParseIntelOperand();
+ std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
- X86Operand *ParseIntelOperator(unsigned OpKind);
- X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
- X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
- unsigned Size);
+ std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
+ std::unique_ptr<X86Operand>
+ ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
+ std::unique_ptr<X86Operand>
+ ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
- X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
- int64_t ImmDisp, unsigned Size);
+ std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
+ SMLoc Start,
+ int64_t ImmDisp,
+ unsigned Size);
bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
- X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
- X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End,
- unsigned Size, StringRef Identifier,
- InlineAsmIdentifierInfo &Info);
+ std::unique_ptr<X86Operand>
+ CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
+ unsigned IndexReg, unsigned Scale, SMLoc Start,
+ SMLoc End, unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
/// instrumentation around Inst.
- void EmitInstruction(MCInst &Inst,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out);
+ void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
/// doSrcDstMatch - Returns true if operands are matching in their
@@ -674,8 +704,8 @@ private:
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
/// \return \c true if no parsing errors occurred, \c false otherwise.
- bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const MCParsedAsmOperand &Op);
+ bool HandleAVX512Operand(OperandVector &Operands,
+ const MCParsedAsmOperand &Op);
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
@@ -725,9 +755,8 @@ public:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
};
@@ -908,7 +937,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
-X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
+std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
@@ -916,7 +945,7 @@ X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
/*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
}
-X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
+std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
@@ -924,7 +953,7 @@ X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
/*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
}
-X86Operand *X86AsmParser::ParseOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
return ParseATTOperand();
@@ -946,12 +975,10 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
return Size;
}
-X86Operand *
-X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End,
- unsigned Size, StringRef Identifier,
- InlineAsmIdentifierInfo &Info){
+std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
+ unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info) {
// If this is not a VarDecl then assume it is a FuncDecl or some other label
// reference. We need an 'r' constraint here, so we need to create register
// operand to ensure proper matching. Just pick a GPR based on the size of
@@ -1064,7 +1091,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
- switch (getLexer().getKind()) {
+ AsmToken::TokenKind TK = getLexer().getKind();
+ switch (TK) {
default: {
if (SM.isValidEndState()) {
Done = true;
@@ -1076,13 +1104,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
Done = true;
break;
}
+ case AsmToken::String:
case AsmToken::Identifier: {
// This could be a register or a symbolic displacement.
unsigned TmpReg;
const MCExpr *Val;
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
- if(!ParseRegister(TmpReg, IdentLoc, End)) {
+ if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
UpdateLocLex = false;
break;
@@ -1142,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
}
case AsmToken::Plus: SM.onPlus(); break;
case AsmToken::Minus: SM.onMinus(); break;
+ case AsmToken::Tilde: SM.onNot(); break;
case AsmToken::Star: SM.onStar(); break;
case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::Pipe: SM.onOr(); break;
@@ -1164,9 +1194,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return false;
}
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
- int64_t ImmDisp,
- unsigned Size) {
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+ int64_t ImmDisp, unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
if (getLexer().isNot(AsmToken::LBrac))
@@ -1270,9 +1300,9 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
}
/// \brief Parse intel style segment override.
-X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
- SMLoc Start,
- unsigned Size) {
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
+ unsigned Size) {
assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
const AsmToken &Tok = Parser.getTok(); // Eat colon.
if (Tok.isNot(AsmToken::Colon))
@@ -1321,8 +1351,9 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
}
/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
- unsigned Size) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
+ SMLoc Start,
+ unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -1425,7 +1456,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
@@ -1462,7 +1493,7 @@ enum IntelOperatorKind {
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
@@ -1495,7 +1526,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
return X86Operand::CreateImm(Imm, Start, End);
}
-X86Operand *X86AsmParser::ParseIntelOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -1523,7 +1554,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
// Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
- getLexer().is(AsmToken::LParen)) {
+ getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
AsmToken StartTok = Tok;
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
@@ -1577,7 +1608,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
}
-X86Operand *X86AsmParser::ParseATTOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
@@ -1613,9 +1644,8 @@ X86Operand *X86AsmParser::ParseATTOperand() {
}
}
-bool
-X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const MCParsedAsmOperand &Op) {
+bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
+ const MCParsedAsmOperand &Op) {
if(STI.getFeatureBits() & X86::FeatureAVX512) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
@@ -1653,8 +1683,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands
} else {
// Parse mask register {%k1}
Operands.push_back(X86Operand::CreateToken("{", consumedToken));
- if (X86Operand *Op = ParseOperand()) {
- Operands.push_back(Op);
+ if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
+ Operands.push_back(std::move(Op));
if (!getLexer().is(AsmToken::RCurly))
return !ErrorAndEatStatement(getLexer().getLoc(),
"Expected } at this point");
@@ -1682,7 +1712,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
-X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
+ SMLoc MemStart) {
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
@@ -1845,9 +1876,8 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
MemStart, MemEnd);
}
-bool X86AsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
InstInfo = &Info;
StringRef PatchedName = Name;
@@ -1940,9 +1970,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Read the operands.
while(1) {
- if (X86Operand *Op = ParseOperand()) {
- Operands.push_back(Op);
- if (!HandleAVX512Operand(Operands, *Op))
+ if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
+ Operands.push_back(std::move(Op));
+ if (!HandleAVX512Operand(Operands, *Operands.back()))
return true;
} else {
Parser.eatToEndOfStatement();
@@ -1973,27 +2003,25 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.back();
+ X86Operand &Op = (X86Operand &)*Operands.back();
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
- delete &Op;
}
}
// Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op = (X86Operand &)*Operands[1];
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
- Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
- delete &Op;
+ Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
}
}
@@ -2060,8 +2088,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.push_back(DefaultMemSIOperand(NameLoc));
}
} else if (Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
- X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ X86Operand &Op = (X86Operand &)*Operands[1];
+ X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
@@ -2076,10 +2104,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
if (Operands.size() == 1) {
- if (Name == "movsd") {
- delete Operands.back();
+ if (Name == "movsd")
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
- }
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemDIOperand(NameLoc));
Operands.push_back(DefaultMemSIOperand(NameLoc));
@@ -2088,8 +2114,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.push_back(DefaultMemDIOperand(NameLoc));
}
} else if (Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
- X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ X86Operand &Op = (X86Operand &)*Operands[1];
+ X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
@@ -2105,31 +2131,26 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.size() == 3) {
if (isParsingIntelSyntax()) {
// Intel syntax
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[2];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.pop_back();
- }
} else {
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[1];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.erase(Operands.begin() + 1);
- }
}
}
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
- delete Operands[1];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
Operands.erase(Operands.begin() + 1);
- static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
}
}
@@ -2175,9 +2196,7 @@ static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
@@ -2258,51 +2277,47 @@ processInstruction(MCInst &Inst,
static const char *getSubtargetFeatureName(unsigned Val);
-void X86AsmParser::EmitInstruction(
- MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out) {
+void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
+ MCStreamer &Out) {
Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
Out);
Out.EmitInstruction(Inst, STI);
}
-bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
- if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
- Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
- Op->getToken() == "finit" || Op->getToken() == "fsave" ||
- Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
+ Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
+ Op.getToken() == "finit" || Op.getToken() == "fsave" ||
+ Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
- const char *Repl =
- StringSwitch<const char*>(Op->getToken())
- .Case("finit", "fninit")
- .Case("fsave", "fnsave")
- .Case("fstcw", "fnstcw")
- .Case("fstcww", "fnstcw")
- .Case("fstenv", "fnstenv")
- .Case("fstsw", "fnstsw")
- .Case("fstsww", "fnstsw")
- .Case("fclex", "fnclex")
- .Default(nullptr);
+ const char *Repl = StringSwitch<const char *>(Op.getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(nullptr);
assert(Repl && "Unknown wait-prefixed instruction");
- delete Operands[0];
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
@@ -2355,11 +2370,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// following hack.
// Change the operand to point to a temporary token.
- StringRef Base = Op->getToken();
+ StringRef Base = Op.getToken();
SmallString<16> Tmp;
Tmp += Base;
Tmp += ' ';
- Op->setTokenValue(Tmp.str());
+ Op.setTokenValue(Tmp.str());
// If this instruction starts with an 'f', then it is a floating point stack
// instruction. These come in up to three forms for 32-bit, 64-bit, and
@@ -2400,7 +2415,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
ErrorInfoMissingFeature = ErrorInfoIgnore;
// Restore the old token.
- Op->setTokenValue(Base);
+ Op.setTokenValue(Base);
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
@@ -2450,8 +2465,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
- ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
- Op->getLocRange();
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Ranges, MatchingInlineAsm);
}
@@ -2462,10 +2477,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "too few operands for instruction",
EmptyRanges, MatchingInlineAsm);
- X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
- if (Operand->getStartLoc().isValid()) {
- SMRange OperandRange = Operand->getLocRange();
- return Error(Operand->getStartLoc(), "invalid operand for instruction",
+ X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
+ if (Operand.getStartLoc().isValid()) {
+ SMRange OperandRange = Operand.getLocRange();
+ return Error(Operand.getStartLoc(), "invalid operand for instruction",
OperandRange, MatchingInlineAsm);
}
}
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index de3be38..1bbfc11 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -13,6 +13,7 @@
#include "X86AsmParserCommon.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/STLExtras.h"
namespace llvm {
@@ -410,20 +411,19 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
}
- static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+ static std::unique_ptr<X86Operand> CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
- X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
+ auto Res = llvm::make_unique<X86Operand>(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
return Res;
}
- static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
- bool AddressOf = false,
- SMLoc OffsetOfLoc = SMLoc(),
- StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
- X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand>
+ CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(),
+ StringRef SymName = StringRef(), void *OpDecl = nullptr) {
+ auto Res = llvm::make_unique<X86Operand>(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
@@ -432,17 +432,18 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
- X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand> CreateImm(const MCExpr *Val,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ auto Res = llvm::make_unique<X86Operand>(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
return Res;
}
/// Create an absolute memory operand.
- static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
- X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand>
+ CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0,
+ StringRef SymName = StringRef(), void *OpDecl = nullptr) {
+ auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
@@ -456,12 +457,11 @@ struct X86Operand : public MCParsedAsmOperand {
}
/// Create a generalized memory operand.
- static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0,
- StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
+ static std::unique_ptr<X86Operand>
+ CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
+ unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0, StringRef SymName = StringRef(),
+ void *OpDecl = nullptr) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -469,7 +469,7 @@ struct X86Operand : public MCParsedAsmOperand {
// The scale should always be one of {1,2,4,8}.
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
"Invalid scale!");
- X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = SegReg;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = BaseReg;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index c54fbc1..a09767e 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
+ X86AtomicExpandPass.cpp
X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 804606d..55587d4 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1620,7 +1620,8 @@ static int readVVVV(struct InternalInstruction* insn) {
int vvvv;
if (insn->vectorExtensionType == TYPE_EVEX)
- vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
+ vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
+ vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
else if (insn->vectorExtensionType == TYPE_VEX_3B)
vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
else if (insn->vectorExtensionType == TYPE_VEX_2B)
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index bf30a8e..23bca0d 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -73,11 +73,12 @@ public:
};
class X86AsmBackend : public MCAsmBackend {
- StringRef CPU;
+ const StringRef CPU;
bool HasNopl;
+ const uint64_t MaxNopLength;
public:
X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU) {
+ : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
@@ -331,7 +332,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// 15 is the longest single nop instruction. Emit as many 15-byte nops as
// needed, then emit a nop of the remaining length.
do {
- const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+ const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
for (uint8_t i = 0; i < Prefixes; i++)
OW->Write8(0x66);
@@ -365,6 +366,17 @@ public:
}
};
+class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
+ ELF::EM_X86_64);
+ }
+};
+
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
@@ -717,11 +729,10 @@ public:
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
- bool SupportsCU;
public:
DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef CPU, bool SupportsCU)
- : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {}
+ StringRef CPU)
+ : DarwinX86AsmBackend(T, MRI, CPU, false) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
@@ -732,20 +743,16 @@ public:
/// \brief Generate the compact unwind encoding for the CFI instructions.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
- return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ return generateCompactUnwindEncodingImpl(Instrs);
}
};
class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
- bool SupportsCU;
const MachO::CPUSubTypeX86 Subtype;
public:
DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef CPU, bool SupportsCU,
- MachO::CPUSubTypeX86 st)
- : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU),
- Subtype(st) {
- }
+ StringRef CPU, MachO::CPUSubTypeX86 st)
+ : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
@@ -788,7 +795,7 @@ public:
/// \brief Generate the compact unwind encoding for the CFI instructions.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
- return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ return generateCompactUnwindEncodingImpl(Instrs);
}
};
@@ -801,9 +808,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
Triple TheTriple(TT);
if (TheTriple.isOSBinFormatMachO())
- return new DarwinX86_32AsmBackend(T, MRI, CPU,
- TheTriple.isMacOSX() &&
- !TheTriple.isMacOSXVersionLT(10, 7));
+ return new DarwinX86_32AsmBackend(T, MRI, CPU);
if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF())
return new WindowsX86AsmBackend(T, false, CPU);
@@ -823,14 +828,15 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
.Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
.Default(MachO::CPU_SUBTYPE_X86_64_ALL);
- return new DarwinX86_64AsmBackend(T, MRI, CPU,
- TheTriple.isMacOSX() &&
- !TheTriple.isMacOSXVersionLT(10, 7), CS);
+ return new DarwinX86_64AsmBackend(T, MRI, CPU, CS);
}
if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF())
return new WindowsX86AsmBackend(T, true, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+
+ if (TheTriple.getEnvironment() == Triple::GNUX32)
+ return new ELFX86_X32AsmBackend(T, OSABI, CPU);
return new ELFX86_64AsmBackend(T, OSABI, CPU);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 39480ea..83b2777 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -74,8 +74,9 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// FIXME: this should not depend on the target OS version, but on the ld64
// version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
- // FDE relocs may be used.
- DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
+ // FDE relocs may be used. We also use them for the ios simulator.
+ DwarfFDESymbolsUseAbsDiff = (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ || T.isiOS();
UseIntegratedAssembler = true;
}
@@ -142,8 +143,11 @@ getNonexecutableStackSection(MCContext &Ctx) const {
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PointerSize = 8;
+ ExceptionsType = ExceptionHandling::WinEH;
+ }
AssemblerDialect = AsmWriterFlavor;
@@ -157,17 +161,18 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
+ assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PointerSize = 8;
+ ExceptionsType = ExceptionHandling::WinEH;
+ } else {
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
- // Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
UseIntegratedAssembler = true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index e63036c..5e29e5c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -197,14 +197,13 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
}
}
-unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::x86_64)
+unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
+ if (TT.getArch() == Triple::x86_64)
return DWARFFlavour::X86_64;
- if (TheTriple.isOSDarwin())
+ if (TT.isOSDarwin())
return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
- if (TheTriple.isOSCygMing())
+ if (TT.isOSCygMing())
// Unsupported by now, just quick fallback
return DWARFFlavour::X86_32_Generic;
return DWARFFlavour::X86_32_Generic;
@@ -251,8 +250,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitX86MCRegisterInfo(X, RA,
- X86_MC::getDwarfRegFlavour(TT, false),
- X86_MC::getDwarfRegFlavour(TT, true),
+ X86_MC::getDwarfRegFlavour(TheTriple, false),
+ X86_MC::getDwarfRegFlavour(TheTriple, true),
RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 8fe40fd..ebe74cf 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -28,6 +28,7 @@ class MCSubtargetInfo;
class MCRelocationInfo;
class MCStreamer;
class Target;
+class Triple;
class StringRef;
class raw_ostream;
@@ -64,7 +65,7 @@ namespace X86_MC {
void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
- unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+ unsigned getDwarfRegFlavour(Triple TT, bool isEH);
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index c62fd0a..7fa4180 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -19,12 +19,12 @@ public:
raw_ostream &OS)
: MCWinCOFFStreamer(C, AB, *CE, OS) { }
- void EmitWin64EHHandlerData() override;
+ void EmitWinEHHandlerData() override;
void FinishImpl() override;
};
-void X86WinCOFFStreamer::EmitWin64EHHandlerData() {
- MCStreamer::EmitWin64EHHandlerData();
+void X86WinCOFFStreamer::EmitWinEHHandlerData() {
+ MCStreamer::EmitWinEHHandlerData();
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 64e8ea8..d5522ed 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,6 +24,10 @@ class ImmutablePass;
class JITCodeEmitter;
class X86TargetMachine;
+/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
+/// be handled natively in terms of a loop using cmpxchg.
+FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
+
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6912b57..93f516a 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -168,6 +168,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+ "INC and DEC instructions are slower than ADD and SUB">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -228,7 +230,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeaturePCLMUL, FeatureAES,
FeatureCallRegIndirect,
FeaturePRFCHW,
- FeatureSlowLEA,
+ FeatureSlowLEA, FeatureSlowIncDec,
FeatureSlowBTMem, FeatureFastUAMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
@@ -271,7 +273,8 @@ def : ProcessorModel<"knl", HaswellModel,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>;
+ FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
+ FeatureSlowIncDec]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AtomicExpandPass.cpp b/lib/Target/X86/X86AtomicExpandPass.cpp
new file mode 100644
index 0000000..61eefbb
--- /dev/null
+++ b/lib/Target/X86/X86AtomicExpandPass.cpp
@@ -0,0 +1,287 @@
+//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions which
+// cannot be implemented as a single instruction with cmpxchg-based loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-atomic-expand"
+
+namespace {
+ class X86AtomicExpandPass : public FunctionPass {
+ const X86TargetMachine *TM;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit X86AtomicExpandPass(const X86TargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {}
+
+ bool runOnFunction(Function &F) override;
+ bool expandAtomicInsts(Function &F);
+
+ bool needsCmpXchgNb(Type *MemType);
+
+ /// There are four kinds of atomic operations. Two never need expanding:
+ /// cmpxchg is what we expand the others *to*, and loads are easily handled
+ /// by ISelLowering. Atomicrmw and store can need expanding in some
+ /// circumstances.
+ bool shouldExpand(Instruction *Inst);
+
+ /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
+ /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
+ bool shouldExpandStore(StoreInst *SI);
+
+ /// Only some atomicrmw instructions need expanding -- some operations
+ /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
+ /// limited support but can't return the previous value; some (e.g. add)
+ /// have complete support in the instruction set.
+ ///
+ /// Also, naturally, 128-bit operations always need to be expanded.
+ bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
+
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicStore(StoreInst *SI);
+ };
+}
+
+char X86AtomicExpandPass::ID = 0;
+
+FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
+ return new X86AtomicExpandPass(TM);
+}
+
+bool X86AtomicExpandPass::runOnFunction(Function &F) {
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (BasicBlock &BB : F)
+ for (Instruction &Inst : BB) {
+ if (isa<AtomicRMWInst>(&Inst) ||
+ (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
+ AtomicInsts.push_back(&Inst);
+ }
+
+ bool MadeChange = false;
+ for (Instruction *Inst : AtomicInsts) {
+ if (!shouldExpand(Inst))
+ continue;
+
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ MadeChange |= expandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ MadeChange |= expandAtomicStore(SI);
+
+ assert(MadeChange && "Atomic inst not expanded when it should be?");
+ Inst->eraseFromParent();
+ }
+
+ return MadeChange;
+}
+
+/// Returns true if operations on the given type will need to use either
+/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
+/// native width, and the instructions are available (otherwise we leave them
+/// alone to become __sync_fetch_and_... calls).
+bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ if (!Subtarget.hasCmpxchg16b())
+ return false;
+
+ unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
+
+ unsigned OpWidth = MemType->getPrimitiveSizeInBits();
+ if (OpWidth == CmpXchgNbWidth)
+ return true;
+
+ return false;
+}
+
+
+bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+
+ if (needsCmpXchgNb(AI->getType()))
+ return true;
+
+ if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
+ return false;
+
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ // It's better to use xadd, xsub or xchg for these in all cases.
+ return false;
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Xor:
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ return !AI->use_empty();
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These always require a non-trivial set of data operations on x86. We must
+ // use a cmpxchg loop.
+ return true;
+ }
+}
+
+bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
+ if (needsCmpXchgNb(SI->getValueOperand()->getType()))
+ return true;
+
+ return false;
+}
+
+bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ return shouldExpandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return shouldExpandStore(SI);
+ return false;
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown atomic op");
+}
+
+bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+ AtomicOrdering Order =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ AI->replaceAllUsesWith(NewLoaded);
+
+ return true;
+}
+
+bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+ // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
+ // this in terms of the usual expansion to "atomicrmw xchg".
+ IRBuilder<> Builder(SI);
+ AtomicOrdering Order =
+ SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering();
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), Order);
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ if (shouldExpandAtomicRMW(AI)) {
+ expandAtomicRMW(AI);
+ AI->eraseFromParent();
+ return true;
+ }
+
+ return AI;
+}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 76718d0..a3ae7ee 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1113,9 +1113,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0])
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ DebugLoc DL = MI.getDebugLoc();
+ DL.print(MI.getParent()->getParent()->getFunction()->getContext(),
+ llvm::errs());
report_fatal_error("JIT does not support inline asm!");
+ }
break;
+ case TargetOpcode::DBG_VALUE:
case TargetOpcode::CFI_INSTRUCTION:
break;
case TargetOpcode::GC_LABEL:
@@ -1126,6 +1131,16 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
break;
+
+ case X86::SEH_PushReg:
+ case X86::SEH_SaveReg:
+ case X86::SEH_SaveXMM:
+ case X86::SEH_StackAlloc:
+ case X86::SEH_SetFrame:
+ case X86::SEH_PushFrame:
+ case X86::SEH_EndPrologue:
+ break;
+
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
MCE.emitByte(BaseOpcode);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 56bcfa3..ce554ba 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -16,10 +16,12 @@
#include "X86.h"
#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -78,12 +80,14 @@ public:
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
- bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
+ unsigned &ResultReg);
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
- bool Aligned = false);
- bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
- bool Aligned = false);
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -107,6 +111,12 @@ private:
bool X86SelectDivRem(const Instruction *I);
+ bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -147,10 +157,182 @@ private:
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len);
+
+ bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond);
};
} // end anonymous namespace.
+static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
+}
+
+static std::pair<X86::CondCode, bool>
+getX86ConditionCode(CmpInst::Predicate Predicate) {
+ X86::CondCode CC = X86::COND_INVALID;
+ bool NeedSwap = false;
+ switch (Predicate) {
+ default: break;
+ // Floating-point Predicates
+ case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
+ case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
+ case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
+ case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
+ case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
+ case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
+ case CmpInst::FCMP_OEQ: // fall-through
+ case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
+
+ // Integer Predicates
+ case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
+ case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
+ case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
+ case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
+ case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
+ case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
+ case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
+ case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+static std::pair<unsigned, bool>
+getX86SSEConditionCode(CmpInst::Predicate Predicate) {
+ unsigned CC;
+ bool NeedSwap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (Predicate) {
+ default: llvm_unreachable("Unexpected predicate");
+ case CmpInst::FCMP_OEQ: CC = 0; break;
+ case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: CC = 1; break;
+ case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: CC = 2; break;
+ case CmpInst::FCMP_UNO: CC = 3; break;
+ case CmpInst::FCMP_UNE: CC = 4; break;
+ case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: CC = 5; break;
+ case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: CC = 6; break;
+ case CmpInst::FCMP_ORD: CC = 7; break;
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_ONE: CC = 8; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ X86::CondCode TmpCC;
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (II->getParent() != I->getParent())
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
@@ -180,7 +362,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- unsigned &ResultReg) {
+ MachineMemOperand *MMO, unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -228,8 +410,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc), ResultReg), AM);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ addFullAddress(MIB, AM);
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
@@ -237,9 +422,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
-bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
- const X86AddressMode &AM, bool Aligned) {
+bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -249,7 +434,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+ TII.get(X86::AND8ri), AndResult)
+ .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
@@ -288,13 +474,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
break;
}
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM, bool Aligned) {
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
@@ -317,10 +508,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
if (Opc) {
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc)), AM)
- .addImm(Signed ? (uint64_t) CI->getSExtValue() :
- CI->getZExtValue());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
+ : CI->getZExtValue());
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
}
@@ -329,7 +522,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
if (ValReg == 0)
return false;
- return X86FastEmitStore(VT, ValReg, AM, Aligned);
+ bool ValKill = hasTrivialKill(Val);
+ return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@@ -355,17 +549,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
return false;
// Can't handle TLS yet.
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal())
- return false;
-
- // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
- // it works...).
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->getAliasee()))
- if (GVar->isThreadLocal())
- return false;
+ if (GV->isThreadLocal())
+ return false;
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
@@ -696,7 +881,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
- // Can't handle DbgLocLImport.
+ // Can't handle DLL Import.
if (GV->hasDLLImportStorageClass())
return false;
@@ -749,19 +934,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
- unsigned SABIAlignment =
- DL.getABITypeAlignment(S->getValueOperand()->getType());
- bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+ const Value *Val = S->getValueOperand();
+ const Value *Ptr = S->getPointerOperand();
MVT VT;
- if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+ if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
return false;
+ unsigned Alignment = S->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+ bool Aligned = Alignment >= ABIAlignment;
+
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(1), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
- return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
+ return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
@@ -896,25 +1086,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
-bool X86FastISel::X86SelectLoad(const Instruction *I) {
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ const LoadInst *LI = cast<LoadInst>(I);
+
// Atomic loads need special handling.
- if (cast<LoadInst>(I)->isAtomic())
+ if (LI->isAtomic())
return false;
MVT VT;
- if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+ if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
return false;
+ const Value *Ptr = LI->getPointerOperand();
+
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(0), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
unsigned ResultReg = 0;
- if (X86FastEmitLoad(VT, AM, ResultReg)) {
- UpdateValueMap(I, ResultReg);
- return true;
- }
- return false;
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
@@ -994,73 +1188,89 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
- unsigned ResultReg = createResultReg(&X86::GR8RegClass);
- unsigned SetCCOpc;
- bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
- switch (CI->getPredicate()) {
- case CmpInst::FCMP_OEQ: {
- if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ unsigned ResultReg = 0;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: {
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
+ ResultReg);
+ ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
return false;
+ break;
+ }
+ case CmpInst::FCMP_TRUE: {
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+ ResultReg).addImm(1);
+ break;
+ }
+ }
- unsigned EReg = createResultReg(&X86::GR8RegClass);
- unsigned NPReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETEr), EReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::SETNPr), NPReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ if (ResultReg) {
UpdateValueMap(I, ResultReg);
return true;
}
- case CmpInst::FCMP_UNE: {
- if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *RHSC = dyn_cast<ConstantFP>(RHS);
+ if (RHSC && RHSC->isNullValue())
+ RHS = LHS;
+ }
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETEr, X86::SETNPr, X86::AND8rr },
+ { X86::SETNEr, X86::SETPr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
+ case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
+ }
+
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ if (SETFOpc) {
+ if (!X86FastEmitCompare(LHS, RHS, VT))
return false;
- unsigned NEReg = createResultReg(&X86::GR8RegClass);
- unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETNEr), NEReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETPr), PReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::OR8rr),ResultReg)
- .addReg(PReg).addReg(NEReg);
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
+ ResultReg).addReg(FlagReg1).addReg(FlagReg2);
UpdateValueMap(I, ResultReg);
return true;
}
- case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
- case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
- case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
- case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
- case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
- case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
- case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
- case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
- case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
- case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
- case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
- case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-
- case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
- case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
- case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
- case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
- case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
- case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
- case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
- case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
- case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
- case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
- default:
- return false;
- }
- const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ X86::CondCode CC;
+ bool SwapArgs;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ unsigned Opc = X86::getSETFromCond(CC);
+
if (SwapArgs)
- std::swap(Op0, Op1);
+ std::swap(LHS, RHS);
- // Emit a compare of Op0/Op1.
- if (!X86FastEmitCompare(Op0, Op1, VT))
+ // Emit a compare of LHS/RHS.
+ if (!X86FastEmitCompare(LHS, RHS, VT))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SetCCOpc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1126,73 +1336,88 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Fold the common case of a conditional branch with a comparison
// in the same block (values defined on other blocks may not have
// initialized registers).
+ X86::CondCode CC;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
+ case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true;
+ }
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
+ // 0.0.
+ // We don't have to materialize a zero constant for this case and can just
+ // use %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
// Try to take advantage of fallthrough opportunities.
- CmpInst::Predicate Predicate = CI->getPredicate();
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
- bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
- unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
-
+ // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
+ // code check. Instead two branch instructions are required to check all
+ // the flags. First we change the predicate to a supported condition code,
+ // which will be the first branch. Later one we will emit the second
+ // branch.
+ bool NeedExtraBranch = false;
switch (Predicate) {
+ default: break;
case CmpInst::FCMP_OEQ:
- std::swap(TrueMBB, FalseMBB);
- Predicate = CmpInst::FCMP_UNE;
- // FALL THROUGH
- case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
- case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
- case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
- case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
- case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
- case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
- case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
- case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
- case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
- case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
- case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-
- case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
- case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
- case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
- case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
- case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
- case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
- case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
- case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
- case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
- default:
- return false;
+ std::swap(TrueMBB, FalseMBB); // fall-through
+ case CmpInst::FCMP_UNE:
+ NeedExtraBranch = true;
+ Predicate = CmpInst::FCMP_ONE;
+ break;
}
- const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ bool SwapArgs;
+ unsigned BranchOpc;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ BranchOpc = X86::GetCondBranchFromCond(CC);
if (SwapArgs)
- std::swap(Op0, Op1);
+ std::swap(CmpLHS, CmpRHS);
// Emit a compare of the LHS and RHS, setting the flags.
- if (!X86FastEmitCompare(Op0, Op1, VT))
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- if (Predicate == CmpInst::FCMP_UNE) {
- // X86 requires a second branch to handle UNE (and OEQ,
- // which is mapped to UNE above).
+ // X86 requires a second branch to handle UNE (and OEQ, which is mapped
+ // to UNE above).
+ if (NeedExtraBranch) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4))
.addMBB(TrueMBB);
}
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ // Emits an unconditional branch to the FalseBB, obtains the branch
+ // weight, and adds it to the successor list.
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1224,10 +1449,32 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
return true;
}
}
+ } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(BI->getCondition());
+ if (TmpReg == 0)
+ return false;
+
+ unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DbgLoc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ return true;
}
// Otherwise do a clumsy setcc and re-test it.
@@ -1241,7 +1488,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
return true;
}
@@ -1478,50 +1729,319 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
return true;
}
-bool X86FastISel::X86SelectSelect(const Instruction *I) {
- MVT VT;
- if (!isTypeLegal(I->getType(), VT))
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
+ // Check if the subtarget supports these instructions.
+ if (!Subtarget->hasCMov())
return false;
- // We only use cmov here, if we don't have a cmov instruction bail.
- if (!Subtarget->hasCMov()) return false;
+ // FIXME: Add support for i8.
+ if (RetVT < MVT::i16 || RetVT > MVT::i64)
+ return false;
- unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
- if (VT == MVT::i16) {
- Opc = X86::CMOVE16rr;
- RC = &X86::GR16RegClass;
- } else if (VT == MVT::i32) {
- Opc = X86::CMOVE32rr;
- RC = &X86::GR32RegClass;
- } else if (VT == MVT::i64) {
- Opc = X86::CMOVE64rr;
- RC = &X86::GR64RegClass;
- } else {
+ const Value *Cond = I->getOperand(0);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ bool NeedTest = true;
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+ { X86::SETPr, X86::SETNEr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ }
+
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ return false;
+
+ if (SETFOpc) {
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ auto const &II = TII.get(SETFOpc[2]);
+ if (II.getNumDefs()) {
+ unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ }
+ }
+ NeedTest = false;
+ } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(Cond);
+ if (TmpReg == 0)
+ return false;
+
+ NeedTest = false;
+ }
+
+ if (NeedTest) {
+ // Selects operate on i1, however, CondReg is 8 bits width and may contain
+ // garbage. Indeed, only the less significant bit is supposed to be
+ // accurate. If we read more than the lsb, we may see non-zero values
+ // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+ // the select. This is achieved by performing TEST against 1.
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+/// \brief Emit SSE instructions to lower the select.
+///
+/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
+/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
+/// SSE instructions are available.
+bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
+ if (!CI || (CI->getParent() != I->getParent()))
return false;
+
+ if (I->getType() != CI->getOperand(0)->getType() ||
+ !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
+ (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
}
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
- if (Op0Reg == 0) return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
- if (Op1Reg == 0) return false;
- unsigned Op2Reg = getRegForValue(I->getOperand(2));
- if (Op2Reg == 0) return false;
-
- // Selects operate on i1, however, Op0Reg is 8 bits width and may contain
- // garbage. Indeed, only the less significant bit is supposed to be accurate.
- // If we read more than the lsb, we may see non-zero values whereas lsb
- // is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
- // This is achieved by performing TEST against 1.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(Op0Reg).addImm(1);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Op1Reg).addReg(Op2Reg);
+ unsigned CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
+ if (CC > 7)
+ return false;
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ static unsigned OpcTable[2][2][4] = {
+ { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
+ { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
+ { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ };
+
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
+
+ unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
+
+ if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
+ // These are pseudo CMOV instructions and will be later expanded into control-
+ // flow.
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::i8: Opc = X86::CMOV_GR8; break;
+ case MVT::i16: Opc = X86::CMOV_GR16; break;
+ case MVT::i32: Opc = X86::CMOV_GR32; break;
+ case MVT::f32: Opc = X86::CMOV_FR32; break;
+ case MVT::f64: Opc = X86::CMOV_FR64; break;
+ }
+
+ const Value *Cond = I->getOperand(0);
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
+ if (CC > X86::LAST_VALID_COND)
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ return false;
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+
+ unsigned ResultReg =
+ FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
UpdateValueMap(I, ResultReg);
return true;
}
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ // Check if we can fold the select.
+ if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ const Value *Opnd = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+ case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
+ }
+ // No need for a select anymore - this is an unconditional move.
+ if (Opnd) {
+ unsigned OpReg = getRegForValue(Opnd);
+ if (OpReg == 0)
+ return false;
+ bool OpIsKill = hasTrivialKill(Opnd);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(OpReg, getKillRegState(OpIsKill));
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // First try to use real conditional move instructions.
+ if (X86FastEmitCMoveSelect(RetVT, I))
+ return true;
+
+ // Try to use a sequence of SSE instructions to simulate a conditional move.
+ if (X86FastEmitSSESelect(RetVT, I))
+ return true;
+
+ // Fall-back to pseudo conditional move instructions, which will be later
+ // converted to control-flow.
+ if (X86FastEmitPseudoSelect(RetVT, I))
+ return true;
+
+ return false;
+}
+
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
if (X86ScalarSSEf64 &&
@@ -1633,8 +2153,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
}
unsigned Reg;
- bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
- RV &= X86FastEmitStore(VT, Reg, DestAM);
+ bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
+ RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
assert(RV && "Failed to emit load or store??");
unsigned Size = VT.getSizeInBits()/8;
@@ -1646,10 +2166,74 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
return true;
}
+static bool isCommutativeIntrinsic(IntrinsicInst const &I) {
+ switch (I.getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::frameaddress: {
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Invalid result type for frameaddress.");
+ case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
+ case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
+ }
+
+ // This needs to be set before we call getFrameRegister, otherwise we get
+ // the wrong frame register.
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+ (FrameReg == X86::EBP && VT == MVT::i32)) &&
+ "Invalid Frame Register!");
+
+ // Always make a copy of the frame register to to a vreg first, so that we
+ // never directly reference the frame register (the TwoAddressInstruction-
+ // Pass doesn't like that).
+ unsigned SrcReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
+
+ // Now recursively load from the frame address.
+ // movq (%rbp), %rax
+ // movq (%rax), %rax
+ // movq (%rax), %rax
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), DestReg), SrcReg);
+ SrcReg = DestReg;
+ }
+
+ UpdateValueMap(&I, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy: {
const MemCpyInst &MCI = cast<MemCpyInst>(I);
// Don't handle volatile or variable length memcpys.
@@ -1726,52 +2310,233 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
return true;
}
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow: {
- // FIXME: Should fold immediates.
+ case Intrinsic::sqrt: {
+ if (!Subtarget->hasSSE1())
+ return false;
- // Replace "add with overflow" intrinsics with an "add" instruction followed
- // by a seto/setc instruction.
- const Function *Callee = I.getCalledFunction();
- Type *RetTy =
- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+ Type *RetTy = I.getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
- const Value *Op1 = I.getArgOperand(0);
- const Value *Op2 = I.getArgOperand(1);
- unsigned Reg1 = getRegForValue(Op1);
- unsigned Reg2 = getRegForValue(Op2);
+ // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT
+ // is not generated by FastISel yet.
+ // FIXME: Update this code once tablegen can handle it.
+ static const unsigned SqrtOpc[2][2] = {
+ {X86::SQRTSSr, X86::VSQRTSSr},
+ {X86::SQRTSDr, X86::VSQRTSDr}
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
+ case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+ }
+
+ const Value *SrcVal = I.getArgOperand(0);
+ unsigned SrcReg = getRegForValue(SrcVal);
- if (Reg1 == 0 || Reg2 == 0)
- // FIXME: Handle values *not* in registers.
+ if (SrcReg == 0)
return false;
- unsigned OpC = 0;
- if (VT == MVT::i32)
- OpC = X86::ADD32rr;
- else if (VT == MVT::i64)
- OpC = X86::ADD64rr;
- else
+ unsigned ImplicitDefReg = 0;
+ if (HasAVX) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg);
+
+ if (ImplicitDefReg)
+ MIB.addReg(ImplicitDefReg);
+
+ MIB.addReg(SrcReg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics
+ // into add/sub/mul followed by either seto or setb.
+ const Function *Callee = I.getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+ Type *CondTy = Ty->getTypeAtIndex(1);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT < MVT::i8 || VT > MVT::i64)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0);
+ const Value *RHS = I.getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(I))
+ std::swap(LHS, RHS);
+
+ unsigned BaseOpc, CondOpc;
+ switch (I.getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ case Intrinsic::uadd_with_overflow:
+ BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
+ case Intrinsic::ssub_with_overflow:
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ case Intrinsic::usub_with_overflow:
+ BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
+ case Intrinsic::smul_with_overflow:
+ BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
+ case Intrinsic::umul_with_overflow:
+ BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
+ }
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (LHSReg == 0)
return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
- // The call to CreateRegs builds two sequential registers, to store the
- // both the returned values.
- unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpC), ResultReg)
- .addReg(Reg1).addReg(Reg2);
+ unsigned ResultReg = 0;
+ // Check if we have an immediate version.
+ if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
+ ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ C->getZExtValue());
+ }
- unsigned Opc = X86::SETBr;
- if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
- Opc = X86::SETOr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
- ResultReg + 1);
+ unsigned RHSReg;
+ bool RHSIsKill;
+ if (!ResultReg) {
+ RHSReg = getRegForValue(RHS);
+ if (RHSReg == 0)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill);
+ }
+
+ // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
+ // it manually.
+ if (BaseOpc == X86ISD::UMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
+ static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
+ // First copy the first operand into RAX, which is an implicit input to
+ // the X86::MUL*r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
+ } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
+ if (VT == MVT::i8) {
+ // Copy the first operand into AL, which is an implicit input to the
+ // X86::IMUL8r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), X86::AL)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ RHSIsKill);
+ } else
+ ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ }
+
+ if (!ResultReg)
+ return false;
+
+ unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+ assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
+ ResultReg2);
UpdateValueMap(&I, ResultReg, 2);
return true;
}
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ bool IsInputDouble;
+ switch (I.getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ IsInputDouble = false;
+ break;
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (!Subtarget->hasSSE2())
+ return false;
+ IsInputDouble = true;
+ break;
+ }
+
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ static const unsigned CvtOpc[2][2][2] = {
+ { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
+ { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
+ { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
+ { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected result type.");
+ case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
+ case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
+ }
+
+ // Check if we can fold insertelement instructions into the convert.
+ const Value *Op = I.getArgOperand(0);
+ while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
+ const Value *Index = IE->getOperand(2);
+ if (!isa<ConstantInt>(Index))
+ break;
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+
+ if (Idx == 0) {
+ Op = IE->getOperand(1);
+ break;
+ }
+ Op = IE->getOperand(0);
+ }
+
+ unsigned Reg = getRegForValue(Op);
+ if (Reg == 0)
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Reg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
}
}
@@ -1794,31 +2559,43 @@ bool X86FastISel::FastLowerArguments() {
return false;
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- if (Idx > 6)
- return false;
-
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
F->getAttributes().hasAttribute(Idx, Attribute::Nest))
return false;
- Type *ArgTy = I->getType();
+ Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
EVT ArgVT = TLI.getValueType(ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: return false;
case MVT::i32:
case MVT::i64:
+ ++GPRCnt;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ ++FPRCnt;
break;
- default:
- return false;
}
+
+ if (GPRCnt > 6)
+ return false;
+
+ if (FPRCnt > 8)
+ return false;
}
static const MCPhysReg GPR32ArgRegs[] = {
@@ -1827,24 +2604,33 @@ bool X86FastISel::FastLowerArguments() {
static const MCPhysReg GPR64ArgRegs[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
};
+ static const MCPhysReg XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
- Idx = 0;
- const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
- const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
- const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
- unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned SrcReg;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
+ case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
+ case MVT::f32: // fall-through
+ case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
+ }
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(DstReg, getKillRegState(true));
- UpdateValueMap(I, ResultReg);
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(&Arg, ResultReg);
}
return true;
}
@@ -2147,7 +2933,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
return false;
} else {
- if (!X86FastEmitStore(ArgVT, Arg, AM))
+ if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
return false;
}
}
@@ -2430,7 +3216,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
- // Materialize addresses with LEA instructions.
+ // Materialize addresses with LEA/MOV instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
if (X86SelectAddress(C, AM)) {
@@ -2440,10 +3226,19 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
return AM.Base.Reg;
- Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ if (TM.getRelocationModel() == Reloc::Static &&
+ TLI.getPointerTy() == MVT::i64) {
+ // The displacement code be more than 32 bits away so we need to use
+ // an instruction with a 64 bit immediate
+ Opc = X86::MOV64ri;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
+ } else {
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
+ }
return ResultReg;
}
return 0;
@@ -2544,8 +3339,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
+ const Value *Ptr = LI->getPointerOperand();
X86AddressMode AM;
- if (!X86SelectAddress(LI->getOperand(0), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
@@ -2553,13 +3349,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = DL.getABITypeAlignment(LI->getType());
+
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
- if (!Result) return false;
+ if (!Result)
+ return false;
+ Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 6c5b86f..4be766a 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -32,86 +32,89 @@ using namespace llvm;
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
- class FixupLEAPass : public MachineFunctionPass {
- enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
- static char ID;
- /// \brief Loop over all of the instructions in the basic block
- /// replacing applicable instructions with LEA instructions,
- /// where appropriate.
- bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+class FixupLEAPass : public MachineFunctionPass {
+ enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+ static char ID;
+ /// \brief Loop over all of the instructions in the basic block
+ /// replacing applicable instructions with LEA instructions,
+ /// where appropriate.
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
- const char *getPassName() const override { return "X86 Atom LEA Fixup";}
+ const char *getPassName() const override { return "X86 Atom LEA Fixup"; }
- /// \brief Given a machine register, look for the instruction
- /// which writes it in the current basic block. If found,
- /// try to replace it with an equivalent LEA instruction.
- /// If replacement succeeds, then also process the the newly created
- /// instruction.
- void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a machine register, look for the instruction
+ /// which writes it in the current basic block. If found,
+ /// try to replace it with an equivalent LEA instruction.
+ /// If replacement succeeds, then also process the the newly created
+ /// instruction.
+ void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Given a memory access or LEA instruction
- /// whose address mode uses a base and/or index register, look for
- /// an opportunity to replace the instruction which sets the base or index
- /// register with an equivalent LEA instruction.
- void processInstruction(MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a memory access or LEA instruction
+ /// whose address mode uses a base and/or index register, look for
+ /// an opportunity to replace the instruction which sets the base or index
+ /// register with an equivalent LEA instruction.
+ void processInstruction(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Given a LEA instruction which is unprofitable
- /// on Silvermont try to replace it with an equivalent ADD instruction
- void processInstructionForSLM(MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a LEA instruction which is unprofitable
+ /// on Silvermont try to replace it with an equivalent ADD instruction
+ void processInstructionForSLM(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Determine if an instruction references a machine register
- /// and, if so, whether it reads or writes the register.
- RegUsageState usesRegister(MachineOperand& p,
- MachineBasicBlock::iterator I);
+ /// \brief Determine if an instruction references a machine register
+ /// and, if so, whether it reads or writes the register.
+ RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
- /// \brief Step backwards through a basic block, looking
- /// for an instruction which writes a register within
- /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
- MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
- MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Step backwards through a basic block, looking
+ /// for an instruction which writes a register within
+ /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+ MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
+ MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief if an instruction can be converted to an
- /// equivalent LEA, insert the new instruction into the basic block
- /// and return a pointer to it. Otherwise, return zero.
- MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI) const;
+ /// \brief if an instruction can be converted to an
+ /// equivalent LEA, insert the new instruction into the basic block
+ /// and return a pointer to it. Otherwise, return zero.
+ MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI) const;
- public:
- FixupLEAPass() : MachineFunctionPass(ID) {}
+public:
+ FixupLEAPass() : MachineFunctionPass(ID) {}
- /// \brief Loop over all of the basic blocks,
- /// replacing instructions by equivalent LEA instructions
- /// if needed and when possible.
- bool runOnMachineFunction(MachineFunction &MF) override;
+ /// \brief Loop over all of the basic blocks,
+ /// replacing instructions by equivalent LEA instructions
+ /// if needed and when possible.
+ bool runOnMachineFunction(MachineFunction &MF) override;
- private:
- MachineFunction *MF;
- const TargetMachine *TM;
- const X86InstrInfo *TII; // Machine instruction info.
-
- };
- char FixupLEAPass::ID = 0;
+private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const X86InstrInfo *TII; // Machine instruction info.
+};
+char FixupLEAPass::ID = 0;
}
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI) const {
- MachineInstr* MI = MBBI;
- MachineInstr* NewMI;
+ MachineInstr *MI = MBBI;
+ MachineInstr *NewMI;
switch (MI->getOpcode()) {
case X86::MOV32rr:
case X86::MOV64rr: {
- const MachineOperand& Src = MI->getOperand(1);
- const MachineOperand& Dest = MI->getOperand(0);
+ const MachineOperand &Src = MI->getOperand(1);
+ const MachineOperand &Dest = MI->getOperand(0);
NewMI = BuildMI(*MF, MI->getDebugLoc(),
- TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
- MFI->insert(MBBI, NewMI); // Insert the new inst
+ TII->get(MI->getOpcode() == X86::MOV32rr ? X86::LEA32r
+ : X86::LEA64r))
+ .addOperand(Dest)
+ .addOperand(Src)
+ .addImm(1)
+ .addReg(0)
+ .addImm(0)
+ .addReg(0);
+ MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
case X86::ADD64ri32:
@@ -144,17 +147,16 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
return TII->convertToThreeAddress(MFI, MBBI, nullptr);
}
-FunctionPass *llvm::createX86FixupLEAs() {
- return new FixupLEAPass();
-}
+FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
TM = &Func.getTarget();
const X86Subtarget &ST = TM->getSubtarget<X86Subtarget>();
if (!ST.LEAusesAG() && !ST.slowLEA())
return false;
- TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo());
+ TII = static_cast<const X86InstrInfo *>(TM->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -165,14 +167,14 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
return true;
}
-FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
- MachineBasicBlock::iterator I) {
+FixupLEAPass::RegUsageState
+FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
- MachineInstr* MI = I;
+ MachineInstr *MI = I;
for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
- MachineOperand& opnd = MI->getOperand(i);
- if (opnd.isReg() && opnd.getReg() == p.getReg()){
+ MachineOperand &opnd = MI->getOperand(i);
+ if (opnd.isReg() && opnd.getReg() == p.getReg()) {
if (opnd.isDef())
return RU_Write;
RegUsage = RU_Read;
@@ -185,23 +187,22 @@ FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
/// block, return a reference to the previous instruction in the block,
/// wrapping around to the last instruction of the block if the block
/// branches to itself.
-static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
if (I == MFI->begin()) {
if (MFI->isPredecessor(MFI)) {
I = --MFI->end();
return true;
- }
- else
+ } else
return false;
}
--I;
return true;
}
-MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
- MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI) {
+MachineBasicBlock::iterator
+FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
int InstrDistance = 1;
MachineBasicBlock::iterator CurInst;
static const int INSTR_DISTANCE_THRESHOLD = 5;
@@ -209,12 +210,12 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
CurInst = I;
bool Found;
Found = getPreviousInstr(CurInst, MFI);
- while( Found && I != CurInst) {
+ while (Found && I != CurInst) {
if (CurInst->isCall() || CurInst->isInlineAsm())
break;
if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
break; // too far back to make a difference
- if (usesRegister(p, CurInst) == RU_Write){
+ if (usesRegister(p, CurInst) == RU_Write) {
return CurInst;
}
InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
@@ -223,32 +224,32 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
return nullptr;
}
-void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
// Process a load, store, or LEA instruction.
MachineInstr *MI = I;
int opcode = MI->getOpcode();
- const MCInstrDesc& Desc = MI->getDesc();
+ const MCInstrDesc &Desc = MI->getDesc();
int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
if (AddrOffset >= 0) {
AddrOffset += X86II::getOperandBias(Desc);
- MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+ MachineOperand &p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
if (p.isReg() && p.getReg() != X86::ESP) {
seekLEAFixup(p, I, MFI);
}
- MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ MachineOperand &q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
if (q.isReg() && q.getReg() != X86::ESP) {
seekLEAFixup(q, I, MFI);
}
}
}
-void FixupLEAPass::seekLEAFixup(MachineOperand& p,
- MachineBasicBlock::iterator& I,
+void FixupLEAPass::seekLEAFixup(MachineOperand &p,
+ MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
if (MBI) {
- MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
+ MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI);
if (NewMI) {
++NumLEAs;
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
@@ -256,7 +257,7 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
MFI->erase(MBI);
MachineBasicBlock::iterator J =
- static_cast<MachineBasicBlock::iterator> (NewMI);
+ static_cast<MachineBasicBlock::iterator>(NewMI);
processInstruction(J, MFI);
}
}
@@ -299,7 +300,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
}
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
- MachineInstr *NewMI = 0;
+ MachineInstr *NewMI = nullptr;
const MachineOperand &Dst = MI->getOperand(0);
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 4c1374f..8c029a8 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -45,7 +46,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = TM.getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
@@ -305,65 +306,25 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-void X86FrameLowering::emitCalleeSavedFrameMoves(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const {
+void
+X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.empty()) return;
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -RegInfo->getSlotSize();
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minimum due to stack growth).
- int64_t MaxOffset = 0;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(I->getFrameIdx()));
-
// Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
unsigned Reg = I->getReg();
- Offset = MaxOffset - Offset + saveAreaOffset;
-
- // Don't output a new machine move if we're re-saving the frame
- // pointer. This happens when the PrologEpilogInserter has inserted an extra
- // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
- // generates one when frame pointers are used. If we generate a "machine
- // move" for this extra "PUSH", the linker will lose track of the fact that
- // the frame pointer should have the value of the first "PUSH" when it's
- // trying to unwind.
- //
- // FIXME: This looks inelegant. It's possibly correct, but it's covering up
- // another bug. I.e., one where we generate a prolog like this:
- //
- // pushl %ebp
- // movl %esp, %ebp
- // pushl %ebp
- // pushl %esi
- // ...
- //
- // The immediate re-push of EBP is unnecessary. At the least, it's an
- // optimization bug. EBP can be used as a scratch register in certain
- // cases, but probably not when we have a frame pointer.
- if (HasFP && FramePtr == Reg)
- continue;
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex =
@@ -395,23 +356,107 @@ static bool usesTheStack(const MachineFunction &MF) {
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
/// generate the exception handling frames.
+
+/*
+ Here's a gist of what gets emitted:
+
+ ; Establish frame pointer, if needed
+ [if needs FP]
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ .seh_pushreg %rpb
+ mov %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+
+ ; Spill general-purpose registers
+ [for all callee-saved GPRs]
+ pushq %<reg>
+ [if not needs FP]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ .seh_pushreg %<reg>
+
+ ; If the required stack alignment > default stack alignment
+ ; rsp needs to be re-aligned. This creates a "re-alignment gap"
+ ; of unknown size in the stack frame.
+ [if stack needs re-alignment]
+ and $MASK, %rsp
+
+ ; Allocate space for locals
+ [if target is Windows and allocated space > 4096 bytes]
+ ; Windows needs special care for allocations larger
+ ; than one page.
+ mov $NNN, %rax
+ call ___chkstk_ms/___chkstk
+ sub %rax, %rsp
+ [else]
+ sub $NNN, %rsp
+
+ [if needs FP]
+ .seh_stackalloc (size of XMM spill slots)
+ .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
+ [else]
+ .seh_stackalloc NNN
+
+ ; Spill XMMs
+ ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
+ ; they may get spilled on any platform, if the current function
+ ; calls @llvm.eh.unwind.init
+ [if needs FP]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, -MMM(%rbp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
+ ; i.e. the offset relative to (%rbp - SEHFrameOffset)
+ [else]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, KKK(%rsp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, KKK
+
+ .seh_endprologue
+
+ [if needs base pointer]
+ mov %rsp, %rbx
+
+ ; Emit CFI info
+ [if needs FP]
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rbp)
+ [else]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rsp)
+
+ Notes:
+ - .seh directives are emitted only for Windows 64 ABI
+ - .cfi directives are emitted for all other ABIs
+ - for 32-bit code, substitute %e?? registers for %r??
+*/
+
void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- Fn->needsUnwindTableEntry();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
bool IsWin64 = STI.isTargetWin64();
+ bool IsWinEH =
+ MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
+ bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
+ bool NeedsDwarfCFI =
+ !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
@@ -509,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(FramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark the place where EBP/RBP was saved.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -527,13 +572,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
+ if (NeedsWinEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+ .addImm(FramePtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
@@ -543,9 +594,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I)
+ // Mark the FramePtr as live-in in every block.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
I->addLiveIn(FramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
@@ -559,10 +609,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
- MBBI->setFlag(MachineInstr::FrameSetup);
+ unsigned Reg = MBBI->getOperand(0).getReg();
++MBBI;
- if (!HasFP && needsFrameMoves) {
+ if (!HasFP && NeedsDwarfCFI) {
// Mark callee-saved push instruction.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -572,16 +622,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
StackOffset += stackGrowth;
}
+
+ if (NeedsWinEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
+ MachineInstr::FrameSetup);
+ }
}
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
-
- // NOTE: We push the registers before realigning the stack, so
- // vector callee-saved (xmm) registers may be saved w/o proper
- // alignment in this way. However, currently these regs are saved in
- // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
- // this shouldn't be a problem.
if (RegInfo->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
MachineInstr *MI =
@@ -680,23 +729,88 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
- } else if (NumBytes)
+ } else if (NumBytes) {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
+ }
+
+ int SEHFrameOffset = 0;
+ if (NeedsWinEH) {
+ if (HasFP) {
+ // We need to set frame base offset low enough such that all saved
+ // register offsets would be positive relative to it, but we can't
+ // just use NumBytes, because .seh_setframe offset must be <=240.
+ // So we pretend to have only allocated enough space to spill the
+ // non-volatile registers.
+ // We don't care about the rest of stack allocation, because unwinder
+ // will restore SP to (BP - SEHFrameOffset)
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ int offset = MFI->getObjectOffset(Info.getFrameIdx());
+ SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+ }
+ SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
+
+ // This only needs to account for XMM spill slots, GPR slots
+ // are covered by the .seh_pushreg's emitted above.
+ unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
+ if (Size) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(Size)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
+ .addImm(FramePtr)
+ .addImm(SEHFrameOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // SP will be the base register for restoring XMMs
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Skip the rest of register spilling code
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ // Emit SEH info for non-GPRs
+ if (NeedsWinEH) {
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ unsigned Reg = Info.getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+ assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
+
+ int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
+ Offset += SEHFrameOffset;
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
+ .addImm(Reg)
+ .addImm(Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
- // Update the frame pointer with the current stack pointer.
+ // Update the base pointer with the current stack pointer.
unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
// Mark end of stack pointer adjustment.
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -711,7 +825,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Emit DWARF info specifying the offsets of the callee-saved registers.
if (PushedRegs)
- emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr);
+ emitCalleeSavedFrameMoves(MBB, MBBI, DL);
}
}
@@ -719,12 +833,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no instructions");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
bool UseLEA = STI.useLeaForSP();
@@ -969,46 +1085,97 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return getFrameIndexOffset(MF, FI);
}
-bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+bool X86FrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ unsigned SlotSize = RegInfo->getSlotSize();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned CalleeSavedFrameSize = 0;
+ int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
- MachineFunction &MF = *MBB.getParent();
+ if (hasFP(MF)) {
+ // emitPrologue always spills frame register the first thing.
+ SpillSlotOffset -= SlotSize;
+ MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+
+ // Since emitPrologue and emitEpilogue will handle spilling and restoring of
+ // the frame register, we can delete it from CSI list and not have to worry
+ // about avoiding it later.
+ unsigned FPReg = RegInfo->getFrameRegister(MF);
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ if (CSI[i].getReg() == FPReg) {
+ CSI.erase(CSI.begin() + i);
+ break;
+ }
+ }
+ }
+
+ // Assign slots for GPRs. It increases frame size.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
+ continue;
- unsigned SlotSize = STI.is64Bit() ? 8 : 4;
- unsigned FPReg = TRI->getFrameRegister(MF);
- unsigned CalleeFrameSize = 0;
+ SpillSlotOffset -= SlotSize;
+ CalleeSavedFrameSize += SlotSize;
+
+ int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+
+ // Assign slots for XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ // ensure alignment
+ SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+ // spill into slot
+ SpillSlotOffset -= RC->getSize();
+ int SlotIndex =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ MFI->ensureMaxAlignment(RC->getAlignment());
+ }
+
+ return true;
+}
+bool X86FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- if (!X86::GR64RegClass.contains(Reg) &&
- !X86::GR32RegClass.contains(Reg))
+ unsigned Reg = CSI[i - 1].getReg();
+
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- CalleeFrameSize += SlotSize;
+
BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
}
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
-
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
- // Note that only Win64 ABI might spill XMMs.
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (X86::GR64RegClass.contains(Reg) ||
@@ -1017,8 +1184,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, TRI);
+
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
+ TRI);
+ --MI;
+ MI->setFlag(MachineInstr::FrameSetup);
+ ++MI;
}
return true;
@@ -1035,6 +1206,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Reload XMMs from stack frame.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -1042,22 +1214,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (X86::GR64RegClass.contains(Reg) ||
X86::GR32RegClass.contains(Reg))
continue;
+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
}
// POP GPRs.
- unsigned FPReg = TRI->getFrameRegister(MF);
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
+
BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
}
return true;
@@ -1065,9 +1234,10 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
void
X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1087,22 +1257,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
TailCallReturnAddrDelta - SlotSize, true);
}
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- (void)FrameIdx;
- }
-
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
@@ -1160,8 +1314,9 @@ void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
uint64_t StackSize;
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
@@ -1368,9 +1523,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
/// temp0 = sp - MaxStack
/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const unsigned SlotSize =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo())
+ ->getSlotSize();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
const bool Is64Bit = STI.is64Bit();
DebugLoc DL;
// HiPE-specific values
@@ -1499,12 +1657,14 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const X86InstrInfo &TII = *TM.getInstrInfo();
- const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const X86RegisterInfo &RegInfo =
+ *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
bool reseveCallFrame = hasReservedCallFrame(MF);
int Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
@@ -1522,7 +1682,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF.getTarget().getFrameLowering()->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = nullptr;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 208bb8b..5ad3d4d 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -14,7 +14,6 @@
#ifndef X86_FRAMELOWERING_H
#define X86_FRAMELOWERING_H
-#include "X86Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -23,19 +22,13 @@ class MCSymbol;
class X86TargetMachine;
class X86FrameLowering : public TargetFrameLowering {
- const X86TargetMachine &TM;
- const X86Subtarget &STI;
public:
- explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
- : TargetFrameLowering(StackGrowsDown,
- sti.getStackAlignment(),
- (sti.is64Bit() ? -8 : -4)),
- TM(tm), STI(sti) {
- }
+ explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
+ : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const;
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -49,6 +42,11 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 74386d3..ba2f5f6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2126,38 +2126,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return getGlobalBaseReg();
- case X86ISD::ATOMOR64_DAG:
- case X86ISD::ATOMXOR64_DAG:
- case X86ISD::ATOMADD64_DAG:
- case X86ISD::ATOMSUB64_DAG:
- case X86ISD::ATOMNAND64_DAG:
- case X86ISD::ATOMAND64_DAG:
- case X86ISD::ATOMMAX64_DAG:
- case X86ISD::ATOMMIN64_DAG:
- case X86ISD::ATOMUMAX64_DAG:
- case X86ISD::ATOMUMIN64_DAG:
- case X86ISD::ATOMSWAP64_DAG: {
- unsigned Opc;
- switch (Opcode) {
- default: llvm_unreachable("Impossible opcode");
- case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
- case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
- case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
- case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
- case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
- case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
- case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
- case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
- case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
- case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
- case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
- }
- SDNode *RetVal = SelectAtomic64(Node, Opc);
- if (RetVal)
- return RetVal;
- break;
- }
-
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cbaf44e..5ccff20 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -44,11 +44,13 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <bitset>
+#include <numeric>
#include <cctype>
using namespace llvm;
@@ -56,6 +58,17 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
+static cl::opt<bool> ExperimentalVectorWideningLegalization(
+ "x86-experimental-vector-widening-legalization", cl::init(false),
+ cl::desc("Enable an experimental vector type legalization through widening "
+ "rather than promotion."),
+ cl::Hidden);
+
+static cl::opt<bool> ExperimentalVectorShuffleLowering(
+ "x86-experimental-vector-shuffle-lowering", cl::init(false),
+ cl::desc("Enable an experimental vector shuffle lowering code path."),
+ cl::Hidden);
+
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -178,29 +191,28 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- bool is64Bit = Subtarget->is64Bit();
-
- if (Subtarget->isTargetMacho()) {
- if (is64Bit)
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::x86_64)
return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
- if (Subtarget->isTargetLinux())
+ if (TT.isOSLinux())
return new X86LinuxTargetObjectFile();
- if (Subtarget->isTargetELF())
+ if (TT.isOSBinFormatELF())
return new TargetLoweringObjectFileELF();
- if (Subtarget->isTargetKnownWindowsMSVC())
+ if (TT.isKnownWindowsMSVCEnvironment())
return new X86WindowsTargetObjectFile();
- if (Subtarget->isTargetCOFF())
+ if (TT.isOSBinFormatCOFF())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
+// FIXME: This should stop caching the target machine as soon as
+// we can remove resetOperationActions et al.
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -443,7 +455,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
setOperationAction(ISD::BR_CC , MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
@@ -497,6 +515,14 @@ void X86TargetLowering::resetOperationActions() {
}
}
+ // Special handling for half-precision floating point conversions.
+ // If we don't have F16C support, then lower half float conversions
+ // into library calls.
+ if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
+ }
+
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
@@ -575,34 +601,18 @@ void X86TargetLowering::resetOperationActions() {
// Expand certain atomics
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
- setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
- if (!Subtarget->is64Bit()) {
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- }
-
if (Subtarget->hasCmpxchg16b()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
- if (!Subtarget->isTargetDarwin() &&
- !Subtarget->isTargetELF() &&
- !Subtarget->isTargetCygMing()) {
+ if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
@@ -861,6 +871,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction(VT,
@@ -1433,6 +1444,11 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
+ }
+
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1563,6 +1579,7 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -1585,6 +1602,16 @@ void X86TargetLowering::resetOperationActions() {
setPrefFunctionAlignment(4); // 2^4 bytes.
}
+TargetLoweringBase::LegalizeTypeAction
+X86TargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (ExperimentalVectorWideningLegalization &&
+ VT.getVectorNumElements() != 1 &&
+ VT.getVectorElementType().getSimpleVT() != MVT::i1)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
@@ -1725,7 +1752,7 @@ const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
- assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
@@ -1824,7 +1851,7 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1844,7 +1871,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -2016,7 +2043,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -2166,8 +2193,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
- getTargetMachine().Options.GuaranteedTailCallOpt);
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
+ CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -2224,7 +2251,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2388,7 +2415,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
TotalNumXMMRegs = 0;
if (IsWin64) {
- const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2587,7 +2614,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2602,7 +2629,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
@@ -2649,7 +2676,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2840,7 +2867,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
@@ -2864,7 +2891,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
@@ -2906,7 +2933,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to
// external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
@@ -2945,7 +2972,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -2969,7 +2996,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
- getTargetMachine().Options.GuaranteedTailCallOpt))
+ DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
@@ -3140,7 +3167,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
- if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
@@ -3152,7 +3179,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
if (RegInfo->needsStackRealignment(MF))
return false;
@@ -3181,7 +3208,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -3202,7 +3229,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
@@ -3216,12 +3243,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext());
+ DAG.getTarget(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext());
+ DAG.getTarget(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -3248,7 +3275,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsCalleeWin64)
@@ -3265,7 +3292,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- ((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ static_cast<const X86InstrInfo *>(DAG.getTarget().getInstrInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3288,12 +3315,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!Subtarget->is64Bit() &&
((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
- getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs =
- (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+ (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3417,7 +3444,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3967,14 +3994,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
unsigned CorrectPosV1 = 0;
unsigned CorrectPosV2 = 0;
- for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
+ for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
+ if (Mask[i] == -1) {
+ ++CorrectPosV1;
+ ++CorrectPosV2;
+ continue;
+ }
+
if (Mask[i] == i)
++CorrectPosV1;
else if (Mask[i] == i + 4)
++CorrectPosV2;
+ }
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
- // We have 3 elements from one vector, and one from another.
+ // We have 3 elements (undefs count as elements from any vector) from one
+ // vector, and one from another.
return true;
return false;
@@ -4823,19 +4858,6 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
return true;
}
-/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
-/// all the same.
-static bool isSplatVector(SDNode *N) {
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
-
- SDValue SplatValue = N->getOperand(0);
- for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i) != SplatValue)
- return false;
- return true;
-}
-
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
@@ -5744,18 +5766,22 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
case ISD::BUILD_VECTOR: {
- // The BUILD_VECTOR node must be a splat.
- if (!isSplatVector(Op.getNode()))
+ auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
+ BitVector UndefElements;
+ SDValue Splat = BVOp->getSplatValue(&UndefElements);
+
+ // We need a splat of a single value to use broadcast, and it doesn't
+ // make any sense if the value is only in one element of the vector.
+ if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
return SDValue();
- Ld = Op.getOperand(0);
+ Ld = Splat;
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
- Ld.getOpcode() == ISD::ConstantFP);
+ Ld.getOpcode() == ISD::ConstantFP);
- // The suspected load node has several users. Make sure that all
- // of its users are from the BUILD_VECTOR node.
- // Constants may have multiple users.
- if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ // Make sure that all of the users of a non-constant load are from the
+ // BUILD_VECTOR node.
+ if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
return SDValue();
break;
}
@@ -6042,6 +6068,433 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
}
+/// \brief Return true if \p N implements a horizontal binop and return the
+/// operands for the horizontal binop into V0 and V1.
+///
+/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This function checks that the build_vector \p N in input implements a
+/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
+/// operation to match.
+/// For example, if \p Opcode is equal to ISD::ADD, then this function
+/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
+/// is equal to ISD::SUB, then this function checks if this is a horizontal
+/// arithmetic sub.
+///
+/// This function only analyzes elements of \p N whose indices are
+/// in range [BaseIdx, LastIdx).
+static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
+ SelectionDAG &DAG,
+ unsigned BaseIdx, unsigned LastIdx,
+ SDValue &V0, SDValue &V1) {
+ EVT VT = N->getValueType(0);
+
+ assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
+ assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
+ "Invalid Vector in input!");
+
+ bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
+ bool CanFold = true;
+ unsigned ExpectedVExtractIdx = BaseIdx;
+ unsigned NumElts = LastIdx - BaseIdx;
+ V0 = DAG.getUNDEF(VT);
+ V1 = DAG.getUNDEF(VT);
+
+ // Check if N implements a horizontal binop.
+ for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
+ SDValue Op = N->getOperand(i + BaseIdx);
+
+ // Skip UNDEFs.
+ if (Op->getOpcode() == ISD::UNDEF) {
+ // Update the expected vector extract index.
+ if (i * 2 == NumElts)
+ ExpectedVExtractIdx = BaseIdx;
+ ExpectedVExtractIdx += 2;
+ continue;
+ }
+
+ CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
+
+ if (!CanFold)
+ break;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Try to match the following pattern:
+ // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
+ CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0) == Op1.getOperand(0) &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ isa<ConstantSDNode>(Op1.getOperand(1)));
+ if (!CanFold)
+ break;
+
+ unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
+
+ if (i * 2 < NumElts) {
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = Op0.getOperand(0);
+ } else {
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = Op0.getOperand(0);
+ if (i * 2 == NumElts)
+ ExpectedVExtractIdx = BaseIdx;
+ }
+
+ SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
+ if (I0 == ExpectedVExtractIdx)
+ CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
+ else if (IsCommutable && I1 == ExpectedVExtractIdx) {
+ // Try to match the following dag sequence:
+ // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
+ CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
+ } else
+ CanFold = false;
+
+ ExpectedVExtractIdx += 2;
+ }
+
+ return CanFold;
+}
+
+/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
+/// a concat_vector.
+///
+/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This function expects two 256-bit vectors called V0 and V1.
+/// At first, each vector is split into two separate 128-bit vectors.
+/// Then, the resulting 128-bit vectors are used to implement two
+/// horizontal binary operations.
+///
+/// The kind of horizontal binary operation is defined by \p X86Opcode.
+///
+/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
+/// the two new horizontal binop.
+/// When Mode is set, the first horizontal binop dag node would take as input
+/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
+/// horizontal binop dag node would take as input the lower 128-bit of V1
+/// and the upper 128-bit of V1.
+/// Example:
+/// HADD V0_LO, V0_HI
+/// HADD V1_LO, V1_HI
+///
+/// Otherwise, the first horizontal binop dag node takes as input the lower
+/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
+/// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
+/// Example:
+/// HADD V0_LO, V1_LO
+/// HADD V0_HI, V1_HI
+///
+/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
+/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
+/// the upper 128-bits of the result.
+static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
+ SDLoc DL, SelectionDAG &DAG,
+ unsigned X86Opcode, bool Mode,
+ bool isUndefLO, bool isUndefHI) {
+ EVT VT = V0.getValueType();
+ assert(VT.is256BitVector() && VT == V1.getValueType() &&
+ "Invalid nodes in input!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
+ SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
+ SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
+ SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
+ EVT NewVT = V0_LO.getValueType();
+
+ SDValue LO = DAG.getUNDEF(NewVT);
+ SDValue HI = DAG.getUNDEF(NewVT);
+
+ if (Mode) {
+ // Don't emit a horizontal binop if the result is expected to be UNDEF.
+ if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
+ LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
+ if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
+ HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
+ } else {
+ // Don't emit a horizontal binop if the result is expected to be UNDEF.
+ if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
+ V1_LO->getOpcode() != ISD::UNDEF))
+ LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
+
+ if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
+ V1_HI->getOpcode() != ISD::UNDEF))
+ HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
+}
+
+/// \brief Try to fold a build_vector that performs an 'addsub' into the
+/// sequence of 'vadd + vsub + blendi'.
+static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(BV);
+ EVT VT = BV->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InVec0 = DAG.getUNDEF(VT);
+ SDValue InVec1 = DAG.getUNDEF(VT);
+
+ assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+ VT == MVT::v2f64) && "build_vector with an invalid type found!");
+
+ // Don't try to emit a VSELECT that cannot be lowered into a blend.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ // Odd-numbered elements in the input build vector are obtained from
+ // adding two integer/float elements.
+ // Even-numbered elements in the input build vector are obtained from
+ // subtracting two integer/float elements.
+ unsigned ExpectedOpcode = ISD::FSUB;
+ unsigned NextExpectedOpcode = ISD::FADD;
+ bool AddFound = false;
+ bool SubFound = false;
+
+ for (unsigned i = 0, e = NumElts; i != e; i++) {
+ SDValue Op = BV->getOperand(i);
+
+ // Skip 'undef' values.
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::UNDEF) {
+ std::swap(ExpectedOpcode, NextExpectedOpcode);
+ continue;
+ }
+
+ // Early exit if we found an unexpected opcode.
+ if (Opcode != ExpectedOpcode)
+ return SDValue();
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Try to match the following pattern:
+ // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
+ // Early exit if we cannot match that sequence.
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op1.getOperand(1)) ||
+ Op0.getOperand(1) != Op1.getOperand(1))
+ return SDValue();
+
+ unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ if (I0 != i)
+ return SDValue();
+
+ // We found a valid add/sub node. Update the information accordingly.
+ if (i & 1)
+ AddFound = true;
+ else
+ SubFound = true;
+
+ // Update InVec0 and InVec1.
+ if (InVec0.getOpcode() == ISD::UNDEF)
+ InVec0 = Op0.getOperand(0);
+ if (InVec1.getOpcode() == ISD::UNDEF)
+ InVec1 = Op1.getOperand(0);
+
+ // Make sure that operands in input to each add/sub node always
+ // come from a same pair of vectors.
+ if (InVec0 != Op0.getOperand(0)) {
+ if (ExpectedOpcode == ISD::FSUB)
+ return SDValue();
+
+ // FADD is commutable. Try to commute the operands
+ // and then test again.
+ std::swap(Op0, Op1);
+ if (InVec0 != Op0.getOperand(0))
+ return SDValue();
+ }
+
+ if (InVec1 != Op1.getOperand(0))
+ return SDValue();
+
+ // Update the pair of expected opcodes.
+ std::swap(ExpectedOpcode, NextExpectedOpcode);
+ }
+
+ // Don't try to fold this build_vector into a VSELECT if it has
+ // too many UNDEF operands.
+ if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
+ InVec1.getOpcode() != ISD::UNDEF) {
+ // Emit a sequence of vector add and sub followed by a VSELECT.
+ // The new VSELECT will be lowered into a BLENDI.
+ // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
+ // and emit a single ADDSUB instruction.
+ SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
+ SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
+
+ // Construct the VSELECT mask.
+ EVT MaskVT = VT.changeVectorElementTypeToInteger();
+ EVT SVT = MaskVT.getVectorElementType();
+ unsigned SVTBits = SVT.getSizeInBits();
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
+ APInt::getAllOnesValue(SVTBits);
+ SDValue Constant = DAG.getConstant(Value, SVT);
+ Ops.push_back(Constant);
+ }
+
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
+ return DAG.getSelect(DL, VT, Mask, Sub, Add);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+ SDValue InVec0, InVec1;
+
+ // Try to match an ADDSUB.
+ if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
+ SDValue Value = matchAddSub(BV, DAG, Subtarget);
+ if (Value.getNode())
+ return Value;
+ }
+
+ // Try to match horizontal ADD/SUB.
+ unsigned NumUndefsLO = 0;
+ unsigned NumUndefsHI = 0;
+ unsigned Half = NumElts/2;
+
+ // Count the number of UNDEF operands in the build_vector in input.
+ for (unsigned i = 0, e = Half; i != e; ++i)
+ if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+ NumUndefsLO++;
+
+ for (unsigned i = Half, e = NumElts; i != e; ++i)
+ if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+ NumUndefsHI++;
+
+ // Early exit if this is either a build_vector of all UNDEFs or all the
+ // operands but one are UNDEF.
+ if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
+ return SDValue();
+
+ if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
+ // Try to match an SSE3 float HADD/HSUB.
+ if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
+ } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
+ // Try to match an SSSE3 integer HADD/HSUB.
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
+ }
+
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
+ // Try to match an AVX horizontal add/sub of packed single/double
+ // precision floating point values from 256-bit vectors.
+ SDValue InVec2, InVec3;
+ if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
+ } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
+ // Try to match an AVX2 horizontal add/sub of signed integers.
+ SDValue InVec2, InVec3;
+ unsigned X86Opcode;
+ bool CanFold = true;
+
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ X86Opcode = X86ISD::HADD;
+ else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ X86Opcode = X86ISD::HSUB;
+ else
+ CanFold = false;
+
+ if (CanFold) {
+ // Fold this build_vector into a single horizontal add/sub.
+ // Do this only if the target has AVX2.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
+
+ // Do not try to expand this build_vector into a pair of horizontal
+ // add/sub if we can emit a pair of scalar add/sub.
+ if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+ return SDValue();
+
+ // Convert this build_vector into a pair of horizontal binop followed by
+ // a concat vector.
+ bool isUndefLO = NumUndefsLO == Half;
+ bool isUndefHI = NumUndefsHI == Half;
+ return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
+ isUndefLO, isUndefHI);
+ }
+ }
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
+ VT == MVT::v16i16) && Subtarget->hasAVX()) {
+ unsigned X86Opcode;
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::HADD;
+ else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::HSUB;
+ else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::FHADD;
+ else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::FHSUB;
+ else
+ return SDValue();
+
+ // Don't try to expand this build_vector into a pair of horizontal add/sub
+ // if we can simply emit a pair of scalar add/sub.
+ if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+ return SDValue();
+
+ // Convert this build_vector into two horizontal add/sub followed by
+ // a concat vector.
+ bool isUndefLO = NumUndefsLO == Half;
+ bool isUndefHI = NumUndefsHI == Half;
+ return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
+ isUndefLO, isUndefHI);
+ }
+
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -6429,38 +6882,1160 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
-// Try to lower a shuffle node into a simple blend instruction.
-static SDValue
-LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- SDLoc dl(SVOp);
- MVT VT = SVOp->getSimpleValueType(0);
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle lowering
+//
+// This is an experimental code path for lowering vector shuffles on x86. It is
+// designed to handle arbitrary vector shuffles and blends, gracefully
+// degrading performance as necessary. It works hard to recognize idiomatic
+// shuffles and lower them to optimal instruction patterns without leaving
+// a framework that allows reasonably efficient handling of all vector shuffle
+// patterns.
+//===----------------------------------------------------------------------===//
+
+/// \brief Tiny helper function to identify a no-op mask.
+///
+/// This is a somewhat boring predicate function. It checks whether the mask
+/// array input, which is assumed to be a single-input shuffle mask of the kind
+/// used by the X86 shuffle instructions (not a fully general
+/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
+/// in-place shuffle are 'no-op's.
+static bool isNoopShuffleMask(ArrayRef<int> Mask) {
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] != -1 && Mask[i] != i)
+ return false;
+ return true;
+}
+
+/// \brief Helper function to classify a mask as a single-input mask.
+///
+/// This isn't a generic single-input test because in the vector shuffle
+/// lowering we canonicalize single inputs to be the first input operand. This
+/// means we can more quickly test for a single input by only checking whether
+/// an input from the second operand exists. We also assume that the size of
+/// mask corresponds to the size of the input vectors which isn't true in the
+/// fully general case.
+static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
+ for (int M : Mask)
+ if (M >= (int)Mask.size())
+ return false;
+ return true;
+}
+
+/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
+///
+/// This helper function produces an 8-bit shuffle immediate corresponding to
+/// the ubiquitous shuffle encoding scheme used in x86 instructions for
+/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
+/// example.
+///
+/// NB: We rely heavily on "undef" masks preserving the input lane.
+static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
+ assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
+ assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
+ assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
+ assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
+
+ unsigned Imm = 0;
+ Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
+ Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
+ Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
+ Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
+ return DAG.getConstant(Imm, MVT::i8);
+}
+
+/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
+///
+/// This is the basis function for the 2-lane 64-bit shuffles as we have full
+/// support for floating point shuffles but not integer shuffles. These
+/// instructions will incur a domain crossing penalty on some chips though so
+/// it is better to avoid lowering through this for integer vectors where
+/// possible.
+static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Straight shuffle of a single input vector. Simulate this by using the
+ // single input as both of the "inputs" to this instruction..
+ unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
+ return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+ assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
+ assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+
+ unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
+ return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+}
+
+/// \brief Handle lowering of 2-lane 64-bit integer shuffles.
+///
+/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
+/// the integer unit to minimize domain crossing penalties. However, for blends
+/// it falls back to the floating point shuffle operation with appropriate bit
+/// casting.
+static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Straight shuffle of a single input vector. For everything from SSE2
+ // onward this has a single fast instruction with no scary immediates.
+ // We have to map the mask as it is actually a v4i32 shuffle instruction.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
+ int WidenedMask[4] = {
+ std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
+ std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1,
+ getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
+ }
+
+ // We implement this with SHUFPD which is pretty lame because it will likely
+ // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
+ // However, all the alternatives are still more cycles and newer chips don't
+ // have this problem. It would be really nice if x86 had better shuffles here.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
+}
+
+/// \brief Lower 4-lane 32-bit floating point shuffles.
+///
+/// Uses instructions exclusively from the floating point unit to minimize
+/// domain crossing penalties, as these are sufficient to implement all v4f32
+/// shuffles.
+static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ SDValue LowV = V1, HighV = V2;
+ int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0)
+ // Straight shuffle of a single input vector. We pass the input vector to
+ // both operands to simulate this with a SHUFPS.
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ if (NumV2Elements == 1) {
+ int V2Index =
+ std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
+ Mask.begin();
+ // Compute the index adjacent to V2Index and in the same half by toggling
+ // the low bit.
+ int V2AdjIndex = V2Index ^ 1;
+
+ if (Mask[V2AdjIndex] == -1) {
+ // Handles all the cases where we have a single V2 element and an undef.
+ // This will only ever happen in the high lanes because we commute the
+ // vector otherwise.
+ if (V2Index < 2)
+ std::swap(LowV, HighV);
+ NewMask[V2Index] -= 4;
+ } else {
+ // Handle the case where the V2 element ends up adjacent to a V1 element.
+ // To make this work, blend them together as the first step.
+ int V1Index = V2AdjIndex;
+ int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
+ V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1,
+ getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+
+ // Now proceed to reconstruct the final blend as we have the necessary
+ // high or low half formed.
+ if (V2Index < 2) {
+ LowV = V2;
+ HighV = V1;
+ } else {
+ HighV = V2;
+ }
+ NewMask[V1Index] = 2; // We put the V1 element in V2[2].
+ NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
+ }
+ } else if (NumV2Elements == 2) {
+ if (Mask[0] < 4 && Mask[1] < 4) {
+ // Handle the easy case where we have V1 in the low lanes and V2 in the
+ // high lanes. We never see this reversed because we sort the shuffle.
+ NewMask[2] -= 4;
+ NewMask[3] -= 4;
+ } else {
+ // We have a mixture of V1 and V2 in both low and high lanes. Rather than
+ // trying to place elements directly, just blend them and set up the final
+ // shuffle to place them.
+
+ // The first two blend mask elements are for V1, the second two are for
+ // V2.
+ int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
+ Mask[2] < 4 ? Mask[2] : Mask[3],
+ (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
+ (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
+ V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2,
+ getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+
+ // Now we do a normal shuffle of V1 by giving V1 as both operands to
+ // a blend.
+ LowV = HighV = V1;
+ NewMask[0] = Mask[0] < 4 ? 0 : 2;
+ NewMask[1] = Mask[0] < 4 ? 2 : 0;
+ NewMask[2] = Mask[2] < 4 ? 1 : 3;
+ NewMask[3] = Mask[2] < 4 ? 3 : 1;
+ }
+ }
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV,
+ getV4X86ShuffleImm8ForMask(NewMask, DAG));
+}
+
+/// \brief Lower 4-lane i32 vector shuffles.
+///
+/// We try to handle these with integer-domain shuffles where we can, but for
+/// blends we use the floating point domain blend instructions.
+static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask))
+ // Straight shuffle of a single input vector. For everything from SSE2
+ // onward this has a single fast instruction with no scary immediates.
+ return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // We implement this with SHUFPS because it can blend from two vectors.
+ // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
+ // up the inputs, bypassing domain shift penalties that we would encur if we
+ // directly used PSHUFD on Nehalem and older. For newer chips, this isn't
+ // relevant.
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
+ DAG.getVectorShuffle(
+ MVT::v4f32, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
+}
+
+/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
+/// shuffle lowering, and the most complex part.
+///
+/// The lowering strategy is to try to form pairs of input lanes which are
+/// targeted at the same half of the final vector, and then use a dword shuffle
+/// to place them onto the right half, and finally unpack the paired lanes into
+/// their final position.
+///
+/// The exact breakdown of how to form these dword pairs and align them on the
+/// correct sides is really tricky. See the comments within the function for
+/// more of the details.
+static SDValue lowerV8I16SingleInputVectorShuffle(
+ SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ MutableArrayRef<int> LoMask = Mask.slice(0, 4);
+ MutableArrayRef<int> HiMask = Mask.slice(4, 4);
+
+ SmallVector<int, 4> LoInputs;
+ std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
+ [](int M) { return M >= 0; });
+ std::sort(LoInputs.begin(), LoInputs.end());
+ LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
+ SmallVector<int, 4> HiInputs;
+ std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
+ [](int M) { return M >= 0; });
+ std::sort(HiInputs.begin(), HiInputs.end());
+ HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
+ int NumLToL =
+ std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
+ int NumHToL = LoInputs.size() - NumLToL;
+ int NumLToH =
+ std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
+ int NumHToH = HiInputs.size() - NumLToH;
+ MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
+ MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
+ MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
+ MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
+
+ // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
+ // such inputs we can swap two of the dwords across the half mark and end up
+ // with <=2 inputs to each half in each half. Once there, we can fall through
+ // to the generic code below. For example:
+ //
+ // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
+ // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
+ //
+ // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2
+ // and 2-2.
+ auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput,
+ int ThreeInputHalfSum, int OneInputHalfOffset) {
+ // Compute the index of dword with only one word among the three inputs in
+ // a half by taking the sum of the half with three inputs and subtracting
+ // the sum of the actual three inputs. The difference is the remaining
+ // slot.
+ int DWordA = (ThreeInputHalfSum -
+ std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) /
+ 2;
+ int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2;
+
+ int PSHUFDMask[] = {0, 1, 2, 3};
+ PSHUFDMask[DWordA] = DWordB;
+ PSHUFDMask[DWordB] = DWordA;
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+
+ // Adjust the mask to match the new locations of A and B.
+ for (int &M : Mask)
+ if (M != -1 && M/2 == DWordA)
+ M = 2 * DWordB + M % 2;
+ else if (M != -1 && M/2 == DWordB)
+ M = 2 * DWordA + M % 2;
+
+ // Recurse back into this routine to re-compute state now that this isn't
+ // a 3 and 1 problem.
+ return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
+ Mask);
+ };
+ if (NumLToL == 3 && NumHToL == 1)
+ return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4);
+ else if (NumLToL == 1 && NumHToL == 3)
+ return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0);
+ else if (NumLToH == 1 && NumHToH == 3)
+ return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0);
+ else if (NumLToH == 3 && NumHToH == 1)
+ return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4);
+
+ // At this point there are at most two inputs to the low and high halves from
+ // each half. That means the inputs can always be grouped into dwords and
+ // those dwords can then be moved to the correct half with a dword shuffle.
+ // We use at most one low and one high word shuffle to collect these paired
+ // inputs into dwords, and finally a dword shuffle to place them.
+ int PSHUFLMask[4] = {-1, -1, -1, -1};
+ int PSHUFHMask[4] = {-1, -1, -1, -1};
+ int PSHUFDMask[4] = {-1, -1, -1, -1};
+
+ // First fix the masks for all the inputs that are staying in their
+ // original halves. This will then dictate the targets of the cross-half
+ // shuffles.
+ auto fixInPlaceInputs = [&PSHUFDMask](
+ ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask, int HalfOffset) {
+ if (InPlaceInputs.empty())
+ return;
+ if (InPlaceInputs.size() == 1) {
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
+ return;
+ }
+
+ assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ // Put the second input next to the first so that they are packed into
+ // a dword. We find the adjacent index by toggling the low bit.
+ int AdjIndex = InPlaceInputs[0] ^ 1;
+ SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
+ std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
+ PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
+ };
+ if (!HToLInputs.empty())
+ fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0);
+ if (!LToHInputs.empty())
+ fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4);
+
+ // Now gather the cross-half inputs and place them into a free dword of
+ // their target half.
+ // FIXME: This operation could almost certainly be simplified dramatically to
+ // look more like the 3-1 fixing operation.
+ auto moveInputsToRightHalf = [&PSHUFDMask](
+ MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
+ MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
+ int SourceOffset, int DestOffset) {
+ auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
+ return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
+ };
+ auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
+ int Word) {
+ int LowWord = Word & ~1;
+ int HighWord = Word | 1;
+ return isWordClobbered(SourceHalfMask, LowWord) ||
+ isWordClobbered(SourceHalfMask, HighWord);
+ };
+
+ if (IncomingInputs.empty())
+ return;
+
+ if (ExistingInputs.empty()) {
+ // Map any dwords with inputs from them into the right half.
+ for (int Input : IncomingInputs) {
+ // If the source half mask maps over the inputs, turn those into
+ // swaps and use the swapped lane.
+ if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
+ if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) {
+ SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
+ Input - SourceOffset;
+ // We have to swap the uses in our half mask in one sweep.
+ for (int &M : HalfMask)
+ if (M == SourceHalfMask[Input - SourceOffset])
+ M = Input;
+ else if (M == Input)
+ M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
+ } else {
+ assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
+ Input - SourceOffset &&
+ "Previous placement doesn't match!");
+ }
+ // Note that this correctly re-maps both when we do a swap and when
+ // we observe the other side of the swap above. We rely on that to
+ // avoid swapping the members of the input list directly.
+ Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
+ }
+
+ // Map the input's dword into the correct half.
+ if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1)
+ PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
+ else
+ assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
+ Input / 2 &&
+ "Previous placement doesn't match!");
+ }
+
+ // And just directly shift any other-half mask elements to be same-half
+ // as we will have mirrored the dword containing the element into the
+ // same position within that half.
+ for (int &M : HalfMask)
+ if (M >= SourceOffset && M < SourceOffset + 4) {
+ M = M - SourceOffset + DestOffset;
+ assert(M >= 0 && "This should never wrap below zero!");
+ }
+ return;
+ }
+
+ // Ensure we have the input in a viable dword of its current half. This
+ // is particularly tricky because the original position may be clobbered
+ // by inputs being moved and *staying* in that half.
+ if (IncomingInputs.size() == 1) {
+ if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
+ int InputFixed = std::find(std::begin(SourceHalfMask),
+ std::end(SourceHalfMask), -1) -
+ std::begin(SourceHalfMask) + SourceOffset;
+ SourceHalfMask[InputFixed - SourceOffset] =
+ IncomingInputs[0] - SourceOffset;
+ std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
+ InputFixed);
+ IncomingInputs[0] = InputFixed;
+ }
+ } else if (IncomingInputs.size() == 2) {
+ if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
+ isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
+ int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2;
+ assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) &&
+ "Not all dwords can be clobbered!");
+ SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset;
+ SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset;
+ for (int &M : HalfMask)
+ if (M == IncomingInputs[0])
+ M = SourceDWordBase + SourceOffset;
+ else if (M == IncomingInputs[1])
+ M = SourceDWordBase + 1 + SourceOffset;
+ IncomingInputs[0] = SourceDWordBase + SourceOffset;
+ IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset;
+ }
+ } else {
+ llvm_unreachable("Unhandled input size!");
+ }
+
+ // Now hoist the DWord down to the right half.
+ int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
+ assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
+ PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
+ for (int Input : IncomingInputs)
+ std::replace(HalfMask.begin(), HalfMask.end(), Input,
+ FreeDWord * 2 + Input % 2);
+ };
+ moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask,
+ /*SourceOffset*/ 4, /*DestOffset*/ 0);
+ moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask,
+ /*SourceOffset*/ 0, /*DestOffset*/ 4);
+
+ // Now enact all the shuffles we've computed to move the inputs into their
+ // target half.
+ if (!isNoopShuffleMask(PSHUFLMask))
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
+ if (!isNoopShuffleMask(PSHUFHMask))
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
+ if (!isNoopShuffleMask(PSHUFDMask))
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+
+ // At this point, each half should contain all its inputs, and we can then
+ // just shuffle them into their final position.
+ assert(std::count_if(LoMask.begin(), LoMask.end(),
+ [](int M) { return M >= 4; }) == 0 &&
+ "Failed to lift all the high half inputs to the low mask!");
+ assert(std::count_if(HiMask.begin(), HiMask.end(),
+ [](int M) { return M >= 0 && M < 4; }) == 0 &&
+ "Failed to lift all the low half inputs to the high mask!");
+
+ // Do a half shuffle for the low mask.
+ if (!isNoopShuffleMask(LoMask))
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(LoMask, DAG));
+
+ // Do a half shuffle with the high mask after shifting its values down.
+ for (int &M : HiMask)
+ if (M >= 0)
+ M -= 4;
+ if (!isNoopShuffleMask(HiMask))
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(HiMask, DAG));
+
+ return V;
+}
+
+/// \brief Detect whether the mask pattern should be lowered through
+/// interleaving.
+///
+/// This essentially tests whether viewing the mask as an interleaving of two
+/// sub-sequences reduces the cross-input traffic of a blend operation. If so,
+/// lowering it through interleaving is a significantly better strategy.
+static bool shouldLowerAsInterleaving(ArrayRef<int> Mask) {
+ int NumEvenInputs[2] = {0, 0};
+ int NumOddInputs[2] = {0, 0};
+ int NumLoInputs[2] = {0, 0};
+ int NumHiInputs[2] = {0, 0};
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+
+ int InputIdx = Mask[i] >= Size;
+
+ if (i < Size / 2)
+ ++NumLoInputs[InputIdx];
+ else
+ ++NumHiInputs[InputIdx];
+
+ if ((i % 2) == 0)
+ ++NumEvenInputs[InputIdx];
+ else
+ ++NumOddInputs[InputIdx];
+ }
+
+ // The minimum number of cross-input results for both the interleaved and
+ // split cases. If interleaving results in fewer cross-input results, return
+ // true.
+ int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0],
+ NumEvenInputs[0] + NumOddInputs[1]);
+ int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0],
+ NumLoInputs[0] + NumHiInputs[1]);
+ return InterleavedCrosses < SplitCrosses;
+}
+
+/// \brief Blend two v8i16 vectors using a naive unpack strategy.
+///
+/// This strategy only works when the inputs from each vector fit into a single
+/// half of that vector, and generally there are not so many inputs as to leave
+/// the in-place shuffles required highly constrained (and thus expensive). It
+/// shifts all the inputs into a single side of both input vectors and then
+/// uses an unpack to interleave these inputs in a single vector. At that
+/// point, we will fall back on the generic single input shuffle lowering.
+static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
+ SDValue V2,
+ MutableArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
+ for (int i = 0; i < 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] < 4)
+ LoV1Inputs.push_back(i);
+ else if (Mask[i] >= 4 && Mask[i] < 8)
+ HiV1Inputs.push_back(i);
+ else if (Mask[i] >= 8 && Mask[i] < 12)
+ LoV2Inputs.push_back(i);
+ else if (Mask[i] >= 12)
+ HiV2Inputs.push_back(i);
+
+ int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
+ int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
+ (void)NumV1Inputs;
+ (void)NumV2Inputs;
+ assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
+ assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
+ assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
+
+ bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
+ HiV1Inputs.size() + HiV2Inputs.size();
+
+ auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
+ ArrayRef<int> HiInputs, bool MoveToLo,
+ int MaskOffset) {
+ ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
+ ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
+ if (BadInputs.empty())
+ return V;
+
+ int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int MoveOffset = MoveToLo ? 0 : 4;
+
+ if (GoodInputs.empty()) {
+ for (int BadInput : BadInputs) {
+ MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset;
+ Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset;
+ }
+ } else {
+ if (GoodInputs.size() == 2) {
+ // If the low inputs are spread across two dwords, pack them into
+ // a single dword.
+ MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] =
+ Mask[GoodInputs[0]] - MaskOffset;
+ MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] =
+ Mask[GoodInputs[1]] - MaskOffset;
+ Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ } else {
+ // Otherwise pin the low inputs.
+ for (int GoodInput : GoodInputs)
+ MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
+ }
+
+ int MoveMaskIdx =
+ std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) -
+ std::begin(MoveMask);
+ assert(MoveMaskIdx >= MoveOffset && "Established above");
+
+ if (BadInputs.size() == 2) {
+ assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
+ assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
+ MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] =
+ Mask[BadInputs[0]] - MaskOffset;
+ MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] =
+ Mask[BadInputs[1]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset;
+ Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset;
+ } else {
+ assert(BadInputs.size() == 1 && "All sizes handled");
+ MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
+ }
+ }
+
+ return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
+ MoveMask);
+ };
+ V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo,
+ /*MaskOffset*/ 0);
+ V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo,
+ /*MaskOffset*/ 8);
+
+ // FIXME: Select an interleaving of the merge of V1 and V2 that minimizes
+ // cross-half traffic in the final shuffle.
+
+ // Munge the mask to be a single-input mask after the unpack merges the
+ // results.
+ for (int &M : Mask)
+ if (M != -1)
+ M = 2 * (M % 4) + (M / 8);
+
+ return DAG.getVectorShuffle(
+ MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
+ DL, MVT::v8i16, V1, V2),
+ DAG.getUNDEF(MVT::v8i16), Mask);
+}
+
+/// \brief Generic lowering of 8-lane i16 shuffles.
+///
+/// This handles both single-input shuffles and combined shuffle/blends with
+/// two inputs. The single input shuffles are immediately delegated to
+/// a dedicated lowering routine.
+///
+/// The blends are lowered in one of three fundamental ways. If there are few
+/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
+/// of the input is significantly cheaper when lowered as an interleaving of
+/// the two inputs, try to interleave them. Otherwise, blend the low and high
+/// halves of the inputs separately (making them have relatively few inputs)
+/// and then concatenate them.
+static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> OrigMask = SVOp->getMask();
+ int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
+ OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]};
+ MutableArrayRef<int> Mask(MaskStorage);
+
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ auto isV1 = [](int M) { return M >= 0 && M < 8; };
+ auto isV2 = [](int M) { return M >= 8; };
+
+ int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1);
+ int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2);
+
+ if (NumV2Inputs == 0)
+ return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG);
+
+ assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
+ "to be V1-input shuffles.");
+
+ if (NumV1Inputs + NumV2Inputs <= 4)
+ return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
+
+ // Check whether an interleaving lowering is likely to be more efficient.
+ // This isn't perfect but it is a strong heuristic that tends to work well on
+ // the kinds of shuffles that show up in practice.
+ //
+ // FIXME: Handle 1x, 2x, and 4x interleaving.
+ if (shouldLowerAsInterleaving(Mask)) {
+ // FIXME: Figure out whether we should pack these into the low or high
+ // halves.
+
+ int EMask[8], OMask[8];
+ for (int i = 0; i < 4; ++i) {
+ EMask[i] = Mask[2*i];
+ OMask[i] = Mask[2*i + 1];
+ EMask[i + 4] = -1;
+ OMask[i + 4] = -1;
+ }
+
+ SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask);
+ SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask);
+
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds);
+ }
+
+ int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+ for (int i = 0; i < 4; ++i) {
+ LoBlendMask[i] = Mask[i];
+ HiBlendMask[i] = Mask[i + 4];
+ }
+
+ SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
+ SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
+ LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV);
+ HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV);
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
+}
+
+/// \brief Generic lowering of v16i8 shuffles.
+///
+/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
+/// detect any complexity reducing interleaving. If that doesn't help, it uses
+/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
+/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
+/// back together.
+static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> OrigMask = SVOp->getMask();
+ assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ int MaskStorage[16] = {
+ OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
+ OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
+ OrigMask[8], OrigMask[9], OrigMask[10], OrigMask[11],
+ OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]};
+ MutableArrayRef<int> Mask(MaskStorage);
+ MutableArrayRef<int> LoMask = Mask.slice(0, 8);
+ MutableArrayRef<int> HiMask = Mask.slice(8, 8);
+
+ // For single-input shuffles, there are some nicer lowering tricks we can use.
+ if (isSingleInputShuffleMask(Mask)) {
+ // Check whether we can widen this to an i16 shuffle by duplicating bytes.
+ // Notably, this handles splat and partial-splat shuffles more efficiently.
+ // However, it only makes sense if the pre-duplication shuffle simplifies
+ // things significantly. Currently, this means we need to be able to
+ // express the pre-duplication shuffle as an i16 shuffle.
+ //
+ // FIXME: We should check for other patterns which can be widened into an
+ // i16 shuffle as well.
+ auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
+ for (int i = 0; i < 16; i += 2) {
+ if (Mask[i] != Mask[i + 1])
+ return false;
+ }
+ return true;
+ };
+ auto tryToWidenViaDuplication = [&]() -> SDValue {
+ if (!canWidenViaDuplication(Mask))
+ return SDValue();
+ SmallVector<int, 4> LoInputs;
+ std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
+ [](int M) { return M >= 0 && M < 8; });
+ std::sort(LoInputs.begin(), LoInputs.end());
+ LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
+ LoInputs.end());
+ SmallVector<int, 4> HiInputs;
+ std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
+ [](int M) { return M >= 8; });
+ std::sort(HiInputs.begin(), HiInputs.end());
+ HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
+ HiInputs.end());
+
+ bool TargetLo = LoInputs.size() >= HiInputs.size();
+ ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
+ ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
+
+ int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ SmallDenseMap<int, int, 8> LaneMap;
+ for (int I : InPlaceInputs) {
+ PreDupI16Shuffle[I/2] = I/2;
+ LaneMap[I] = I;
+ }
+ int j = TargetLo ? 0 : 4, je = j + 4;
+ for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
+ // Check if j is already a shuffle of this input. This happens when
+ // there are two adjacent bytes after we move the low one.
+ if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
+ // If we haven't yet mapped the input, search for a slot into which
+ // we can map it.
+ while (j < je && PreDupI16Shuffle[j] != -1)
+ ++j;
+
+ if (j == je)
+ // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
+ return SDValue();
+
+ // Map this input with the i16 shuffle.
+ PreDupI16Shuffle[j] = MovingInputs[i] / 2;
+ }
+
+ // Update the lane map based on the mapping we ended up with.
+ LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
+ }
+ V1 = DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
+
+ // Unpack the bytes to form the i16s that will be shuffled into place.
+ V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, V1, V1);
+
+ int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ for (int i = 0; i < 16; i += 2) {
+ if (Mask[i] != -1)
+ PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
+ assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!");
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
+ };
+ if (SDValue V = tryToWidenViaDuplication())
+ return V;
+ }
+
+ // Check whether an interleaving lowering is likely to be more efficient.
+ // This isn't perfect but it is a strong heuristic that tends to work well on
+ // the kinds of shuffles that show up in practice.
+ //
+ // FIXME: We need to handle other interleaving widths (i16, i32, ...).
+ if (shouldLowerAsInterleaving(Mask)) {
+ // FIXME: Figure out whether we should pack these into the low or high
+ // halves.
+
+ int EMask[16], OMask[16];
+ for (int i = 0; i < 8; ++i) {
+ EMask[i] = Mask[2*i];
+ OMask[i] = Mask[2*i + 1];
+ EMask[i + 8] = -1;
+ OMask[i + 8] = -1;
+ }
+
+ SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
+ SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
+
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ }
+
+ int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+ auto buildBlendMasks = [](MutableArrayRef<int> HalfMask,
+ MutableArrayRef<int> V1HalfBlendMask,
+ MutableArrayRef<int> V2HalfBlendMask) {
+ for (int i = 0; i < 8; ++i)
+ if (HalfMask[i] >= 0 && HalfMask[i] < 16) {
+ V1HalfBlendMask[i] = HalfMask[i];
+ HalfMask[i] = i;
+ } else if (HalfMask[i] >= 16) {
+ V2HalfBlendMask[i] = HalfMask[i] - 16;
+ HalfMask[i] = i + 8;
+ }
+ };
+ buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask);
+ buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask);
+
+ SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
+
+ auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
+ MutableArrayRef<int> HiBlendMask) {
+ SDValue V1, V2;
+ // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
+ // them out and avoid using UNPCK{L,H} to extract the elements of V as
+ // i16s.
+ if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
+ [](int M) { return M >= 0 && M % 2 == 1; }) &&
+ std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
+ [](int M) { return M >= 0 && M % 2 == 1; })) {
+ // Use a mask to drop the high bytes.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
+ DAG.getConstant(0x00FF, MVT::v8i16));
+
+ // This will be a single vector shuffle instead of a blend so nuke V2.
+ V2 = DAG.getUNDEF(MVT::v8i16);
+
+ // Squash the masks to point directly into V1.
+ for (int &M : LoBlendMask)
+ if (M >= 0)
+ M /= 2;
+ for (int &M : HiBlendMask)
+ if (M >= 0)
+ M /= 2;
+ } else {
+ // Otherwise just unpack the low half of V into V1 and the high half into
+ // V2 so that we can blend them as i16s.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+ }
+
+ SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
+ SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
+ return std::make_pair(BlendedLo, BlendedHi);
+ };
+ SDValue V1Lo, V1Hi, V2Lo, V2Hi;
+ std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask);
+ std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask);
+
+ SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask);
+ SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask);
+
+ return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
+}
+
+/// \brief Dispatching routine to lower various 128-bit x86 vector shuffles.
+///
+/// This routine breaks down the specific type of 128-bit shuffle and
+/// dispatches to the lowering routines accordingly.
+static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ switch (VT.SimpleTy) {
+ case MVT::v2i64:
+ return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v2f64:
+ return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4i32:
+ return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4f32:
+ return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i16:
+ return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i8:
+ return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+
+ default:
+ llvm_unreachable("Unimplemented!");
+ }
+}
+
+/// \brief Tiny helper function to test whether adjacent masks are sequential.
+static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2)
+ if (Mask[i] + 1 != Mask[i+1])
+ return false;
+
+ return true;
+}
+
+/// \brief Top-level lowering for x86 vector shuffles.
+///
+/// This handles decomposition, canonicalization, and lowering of all x86
+/// vector shuffles. Most of the specific lowering strategies are encapsulated
+/// above in helper routines. The canonicalization attempts to widen shuffles
+/// to involve fewer lanes of wider elements, consolidate symmetric patterns
+/// s.t. only one of the two inputs needs to be tested, etc.
+static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT VT = Op.getSimpleValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDLoc dl(Op);
+
+ assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ if (V1IsUndef && V2IsUndef)
+ return DAG.getUNDEF(VT);
+
+ // When we create a shuffle node we put the UNDEF node to second operand,
+ // but in some cases the first operand may be transformed to UNDEF.
+ // In this case we should just commute the node.
+ if (V1IsUndef)
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // Check for non-undef masks pointing at an undef vector and make the masks
+ // undef as well. This makes it easier to match the shuffle based solely on
+ // the mask.
+ if (V2IsUndef)
+ for (int M : Mask)
+ if (M >= NumElements) {
+ SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+ for (int &M : NewMask)
+ if (M >= NumElements)
+ M = -1;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
+ }
+
+ // For integer vector shuffles, try to collapse them into a shuffle of fewer
+ // lanes but wider integers. We cap this to not form integers larger than i64
+ // but it might be interesting to form i128 integers to handle flipping the
+ // low and high halves of AVX 256-bit vectors.
+ if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
+ areAdjacentMasksSequential(Mask)) {
+ SmallVector<int, 8> NewMask;
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2)
+ NewMask.push_back(Mask[i] / 2);
+ MVT NewVT =
+ MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
+ VT.getVectorNumElements() / 2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
+ }
+
+ int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
+ for (int M : SVOp->getMask())
+ if (M < 0)
+ ++NumUndefElements;
+ else if (M < NumElements)
+ ++NumV1Elements;
+ else
+ ++NumV2Elements;
+
+ // Commute the shuffle as needed such that more elements come from V1 than
+ // V2. This allows us to match the shuffle pattern strictly on how many
+ // elements come from V1 without handling the symmetric cases.
+ if (NumV2Elements > NumV1Elements)
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // When the number of V1 and V2 elements are the same, try to minimize the
+ // number of uses of V2 in the low half of the vector.
+ if (NumV1Elements == NumV2Elements) {
+ int LowV1Elements = 0, LowV2Elements = 0;
+ for (int M : SVOp->getMask().slice(0, NumElements / 2))
+ if (M >= NumElements)
+ ++LowV2Elements;
+ else if (M >= 0)
+ ++LowV1Elements;
+ if (LowV2Elements > LowV1Elements)
+ return CommuteVectorShuffle(SVOp, DAG);
+ }
+
+ // For each vector width, delegate to a specialized lowering routine.
+ if (VT.getSizeInBits() == 128)
+ return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
+ llvm_unreachable("Unimplemented!");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Legacy vector shuffle lowering
+//
+// This code is the legacy code handling vector shuffles until the above
+// replaces its functionality and performance.
+//===----------------------------------------------------------------------===//
+
+static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
+ bool hasInt256, unsigned *MaskOut = nullptr) {
MVT EltVT = VT.getVectorElementType();
- unsigned NumElems = VT.getVectorNumElements();
// There is no blend with immediate in AVX-512.
if (VT.is512BitVector())
- return SDValue();
+ return false;
- if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
- return SDValue();
- if (!Subtarget->hasInt256() && VT == MVT::v16i16)
- return SDValue();
+ if (!hasSSE41 || EltVT == MVT::i8)
+ return false;
+ if (!hasInt256 && VT == MVT::v16i16)
+ return false;
- // Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
+ unsigned NumElems = VT.getVectorNumElements();
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
- int SndLaneEltIdx = (NumLanes == 2) ?
- SVOp->getMaskElt(i + NumElemsInLane) : -1;
- int EltIdx = SVOp->getMaskElt(i);
+ int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
+ int EltIdx = MaskVals[i];
if ((EltIdx < 0 || EltIdx == (int)i) &&
(SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
@@ -6469,11 +8044,34 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
if (((unsigned)EltIdx == (i + NumElems)) &&
(SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
- MaskValue |= (1<<i);
+ MaskValue |= (1 << i);
else
- return SDValue();
+ return false;
}
+ if (MaskOut)
+ *MaskOut = MaskValue;
+ return true;
+}
+
+// Try to lower a shuffle node into a simple blend instruction.
+// This function assumes isBlendMask returns true for this
+// SuffleVectorSDNode
+static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
+ unsigned MaskValue,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getSimpleValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
+ Subtarget->hasInt256() && "Trying to lower a "
+ "VECTOR_SHUFFLE to a Blend but "
+ "with the wrong mask"));
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ SDLoc dl(SVOp);
+ unsigned NumElems = VT.getVectorNumElements();
+
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
MVT BlendVT = VT;
@@ -7450,8 +9048,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"unsupported vector type for insertps/pinsrd");
- int FromV1 = std::count_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; });
+ auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
+ auto FromV2Predicate = [](const int &i) { return i >= 4; };
+ int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
SDValue From;
SDValue To;
@@ -7459,23 +9058,26 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
if (FromV1 == 1) {
From = V1;
To = V2;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; }) -
+ DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
} else {
+ assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
+ "More than one element from V1 and from V2, or no elements from one "
+ "of the vectors. This case should not have returned true from "
+ "isINSERTPSMask");
From = V2;
To = V1;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i >= 4; }) -
- Mask.begin();
+ DestIndex =
+ std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
+ unsigned SrcIndex = Mask[DestIndex] % 4;
if (MayFoldLoad(From)) {
// Trivial case, when From comes from a load and is only used by the
// shuffle. Make it use insertps from the vector that we need from that
// load.
SDValue NewLoad =
- NarrowVectorLoadToElement(cast<LoadSDNode>(From), DestIndex, DAG);
+ NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG);
if (!NewLoad.getNode())
return SDValue();
@@ -7496,7 +9098,6 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
}
// Vector-element-to-vector
- unsigned SrcIndex = Mask[DestIndex] % 4;
SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
}
@@ -7663,6 +9264,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool OptForSize = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ // Check if we should use the experimental vector shuffle lowering. If so,
+ // delegate completely to that code path.
+ if (ExperimentalVectorShuffleLowering)
+ return lowerVectorShuffle(Op, Subtarget, DAG);
+
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
if (V1IsUndef && V2IsUndef)
@@ -7796,8 +9402,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
- V1IsSplat = isSplatVector(V1.getNode());
- V2IsSplat = isSplatVector(V2.getNode());
+ BitVector UndefElements;
+ if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode()))
+ if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
+ V1IsSplat = true;
+ if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode()))
+ if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
+ V2IsSplat = true;
// Canonicalize the splat or undef, if present, to be on the RHS.
if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
@@ -7873,6 +9484,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePSHUFLWImmediate(SVOp),
DAG);
+ unsigned MaskValue;
+ if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
+ &MaskValue))
+ return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
+
if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
@@ -7910,10 +9526,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
- if (BlendOp.getNode())
- return BlendOp;
-
if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
return getINSERTPS(SVOp, dl, DAG);
@@ -8530,7 +10142,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
@@ -8563,7 +10175,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
@@ -8596,7 +10208,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
@@ -8617,7 +10229,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
@@ -8639,7 +10251,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
unsigned char OpFlags =
Subtarget->ClassifyBlockAddressReference();
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
@@ -8668,8 +10280,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
- Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
@@ -8868,7 +10480,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -8880,9 +10492,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
- Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ return LowerToTLSExecModel(
+ GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -8895,8 +10507,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
- bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
- !Subtarget->is64Bit();
+ bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
@@ -10050,10 +11662,27 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
break;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
- case X86::COND_O: case X86::COND_NO:
- NeedOF = true;
+ case X86::COND_O: case X86::COND_NO: {
+ // Check if we really need to set the
+ // Overflow flag. If NoSignedWrap is present
+ // that is not actually needed.
+ switch (Op->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SHL: {
+ const BinaryWithFlagsSDNode *BinNode =
+ cast<BinaryWithFlagsSDNode>(Op.getNode());
+ if (BinNode->hasNoSignedWrap())
+ break;
+ }
+ default:
+ NeedOF = true;
+ break;
+ }
break;
}
+ }
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
@@ -10115,14 +11744,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
+ if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
+ if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
@@ -10138,7 +11767,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
- if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
+ if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
@@ -11469,8 +13098,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, dl, DAG);
+ X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ Cond = EmitTest(Cond, X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -11513,7 +13143,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ const TargetFrameLowering &TFI = *DAG.getTarget().getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
@@ -11572,7 +13202,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -11681,7 +13311,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!getTargetMachine().Options.UseSoftFloat &&
+ assert(!DAG.getTarget().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->getAttributes()
.hasAttribute(AttributeSet::FunctionIndex,
@@ -12158,11 +13788,37 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx2_packusdw:
+ return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_pshuf_d:
+ return DAG.getNode(X86ISD::PSHUFD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pshufl_w:
+ return DAG.getNode(X86ISD::PSHUFLW, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pshufh_w:
+ return DAG.getNode(X86ISD::PSHUFHW, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -12610,6 +14266,51 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
return SDValue(Res, 0);
}
+// getReadPerformanceCounter - Handles the lowering of builtin intrinsics that
+// read performance monitor counters (x86_rdpmc).
+static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue LO, HI;
+
+ // The ECX register is used to select the index of the performance counter
+ // to read.
+ SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
+ N->getOperand(2));
+ SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
+
+ // Reads the content of a 64-bit performance counter and returns it in the
+ // registers EDX:EAX.
+ if (Subtarget->is64Bit()) {
+ LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+ LO.getValue(2));
+ } else {
+ LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+ LO.getValue(2));
+ }
+ Chain = HI.getValue(1);
+
+ if (Subtarget->is64Bit()) {
+ // The EAX register is loaded with the low-order 32 bits. The EDX register
+ // is loaded with the supported high-order bits of the counter.
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+ DAG.getConstant(32, MVT::i8));
+ Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { LO, HI };
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
+ Results.push_back(Pair);
+ Results.push_back(Chain);
+}
+
// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that
// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
// also used to custom lower READCYCLECOUNTER nodes.
@@ -12674,7 +14375,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
}
enum IntrinsicType {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
};
struct IntrinsicData {
@@ -12768,6 +14469,8 @@ static void InitIntinsicsMap() {
IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0)));
IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
+ IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0)));
Initialized = true;
}
@@ -12826,7 +14529,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal;
- if (dyn_cast<ConstantSDNode> (Hint) == 0 ||
+ if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
(HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
@@ -12843,6 +14546,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
+ // Read Performance Monitoring Counters.
+ case RDPMC: {
+ SmallVector<SDValue, 2> Results;
+ getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
+ return DAG.getMergeValues(Results, dl);
+ }
// XTEST intrinsics.
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -12873,7 +14582,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -12895,7 +14604,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -12924,7 +14633,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName,
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
@@ -12936,7 +14645,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -12983,7 +14692,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo* TRI = DAG.getTarget().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
@@ -13431,7 +15140,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC),
static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
- Callee, &Args, 0)
+ Callee, std::move(Args), 0)
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -13448,7 +15157,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
(VT == MVT::v8i32 && Subtarget->hasInt256()));
// Get the high parts.
- const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
@@ -13464,10 +15173,18 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
// Shuffle it back into the right order.
- const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15};
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14};
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ SDValue Highs, Lows;
+ if (VT == MVT::v8i32) {
+ const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ } else {
+ const int HighMask[] = {1, 5, 3, 7};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 4, 2, 6};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ }
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
@@ -13494,10 +15211,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDValue Amt = Op.getOperand(1);
// Optimize shl/srl/sra with constant shift amount.
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
- uint64_t ShiftAmt = C->getZExtValue();
+ if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
+ if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
+ uint64_t ShiftAmt = ShiftConst->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
@@ -13804,15 +15520,14 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
-
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
SDValue V;
- if (!Subtarget->hasSSE2())
- return SDValue();
+ assert(VT.isVector() && "Custom lowering only for vector shifts!");
+ assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
V = LowerScalarImmediateShift(Op, DAG, Subtarget);
if (V.getNode())
@@ -14254,7 +15969,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
break;
}
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
- Op.getOperand(2), SDValue());
+ Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
@@ -14264,9 +15979,18 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, T, MMO);
+
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
- return cpOut;
+ SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
+ MVT::i32, cpOut.getValue(2));
+ SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
+ DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
+ return SDValue();
}
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
@@ -14422,7 +16146,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -14446,7 +16170,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
- case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -14528,8 +16253,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
static void ReplaceATOMIC_LOAD(SDNode *Node,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) {
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
@@ -14538,38 +16263,16 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
// (The only way to get a 16-byte load is cmpxchg16b)
// FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
SDValue Zero = DAG.getConstant(0, VT);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
- Node->getOperand(0),
- Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Swap =
+ DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
- Results.push_back(Swap.getValue(1));
-}
-
-static void
-ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
-
- SDValue Chain = Node->getOperand(0);
- SDValue In1 = Node->getOperand(1);
- SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0));
- SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1));
- SDValue Ops[] = { Chain, In1, In2L, In2H };
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
- Results.push_back(Result.getValue(2));
+ Results.push_back(Swap.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
@@ -14656,13 +16359,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case Intrinsic::x86_rdtscp:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
Results);
+ case Intrinsic::x86_rdpmc:
+ return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
}
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
}
- case ISD::ATOMIC_CMP_SWAP: {
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
@@ -14704,61 +16409,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+
+ SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
+ MVT::i32, cpOutH.getValue(2));
+ SDValue Success =
+ DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+ Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
+
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
- Results.push_back(cpOutH.getValue(1));
+ Results.push_back(Success);
+ Results.push_back(EFLAGS.getValue(1));
return;
}
+ case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_SWAP: {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
- case ISD::ATOMIC_LOAD_ADD:
- Opc = X86ISD::ATOMADD64_DAG;
- break;
- case ISD::ATOMIC_LOAD_AND:
- Opc = X86ISD::ATOMAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_NAND:
- Opc = X86ISD::ATOMNAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_OR:
- Opc = X86ISD::ATOMOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_SUB:
- Opc = X86ISD::ATOMSUB64_DAG;
- break;
- case ISD::ATOMIC_LOAD_XOR:
- Opc = X86ISD::ATOMXOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MAX:
- Opc = X86ISD::ATOMMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MIN:
- Opc = X86ISD::ATOMMIN64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMAX:
- Opc = X86ISD::ATOMUMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMIN:
- Opc = X86ISD::ATOMUMIN64_DAG;
- break;
- case ISD::ATOMIC_SWAP:
- Opc = X86ISD::ATOMSWAP64_DAG;
- break;
- }
- ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
- return;
- }
+ case ISD::ATOMIC_LOAD_UMAX:
+ // Delegate to generic TypeLegalization. Situations we can really handle
+ // should have already been dealt with by X86AtomicExpand.cpp.
+ break;
case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
return;
@@ -14779,6 +16456,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
MVT::v2f64, N->getOperand(0));
SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
+ if (ExperimentalVectorWideningLegalization) {
+ // If we are legalizing vectors by widening, we already have the desired
+ // legal vector type, just return it.
+ Results.push_back(ToVecInt);
+ return;
+ }
+
SmallVector<SDValue, 8> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
@@ -14810,6 +16494,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
+ case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
@@ -14863,12 +16549,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
- case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
- case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
- case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
- case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
- case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
- case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
@@ -14909,6 +16590,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::PACKSS: return "X86ISD::PACKSS";
+ case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
@@ -15173,7 +16856,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
- isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
+ isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
+ isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
}
bool
@@ -15256,685 +16940,6 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
return sinkMBB;
}
-// Get CMPXCHG opcode for the specified data type.
-static unsigned getCmpXChgOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::LCMPXCHG8;
- case MVT::i16: return X86::LCMPXCHG16;
- case MVT::i32: return X86::LCMPXCHG32;
- case MVT::i64: return X86::LCMPXCHG64;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get LOAD opcode for the specified data type.
-static unsigned getLoadOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::MOV8rm;
- case MVT::i16: return X86::MOV16rm;
- case MVT::i32: return X86::MOV32rm;
- case MVT::i64: return X86::MOV64rm;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction.
-static unsigned getNonAtomicOpcode(unsigned Opc) {
- switch (Opc) {
- case X86::ATOMAND8: return X86::AND8rr;
- case X86::ATOMAND16: return X86::AND16rr;
- case X86::ATOMAND32: return X86::AND32rr;
- case X86::ATOMAND64: return X86::AND64rr;
- case X86::ATOMOR8: return X86::OR8rr;
- case X86::ATOMOR16: return X86::OR16rr;
- case X86::ATOMOR32: return X86::OR32rr;
- case X86::ATOMOR64: return X86::OR64rr;
- case X86::ATOMXOR8: return X86::XOR8rr;
- case X86::ATOMXOR16: return X86::XOR16rr;
- case X86::ATOMXOR32: return X86::XOR32rr;
- case X86::ATOMXOR64: return X86::XOR64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction with
-// extra opcode.
-static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
- case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
- case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
- case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
- case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
- case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
- case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
- case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
- case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
- case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
- case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
- case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
- case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
- case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
- case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
- case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target.
-static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
- switch (Opc) {
- case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
- case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
- case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
- case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
- case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
- case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
- case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
- case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
- case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
- case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target with extra opcode.
-static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
- unsigned &HiOpc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND6432:
- ExtraOpc = X86::NOT32r;
- HiOpc = X86::AND32rr;
- return X86::AND32rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get pseudo CMOV opcode from the specified data type.
-static unsigned getPseudoCMOVOpc(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::CMOV_GR8;
- case MVT::i16: return X86::CMOV_GR16;
- case MVT::i32: return X86::CMOV_GR32;
- default:
- break;
- }
- llvm_unreachable("Unknown CMOV opcode!");
-}
-
-// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
-// They will be translated into a spin-loop or compare-exchange loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1 = LOAD MI.addr
-// loop:
-// t4 = phi(t1, t3 / loop)
-// t2 = OP MI.val, t4
-// EAX = t4
-// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
-// t3 = EAX
-// JNE loop
-// sink:
-// dst = t3
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstReg, SrcReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- MVT::SimpleValueType VT = *RC->vt_begin();
- unsigned t1 = MRI.createVirtualRegister(RC);
- unsigned t2 = MRI.createVirtualRegister(RC);
- unsigned t3 = MRI.createVirtualRegister(RC);
- unsigned t4 = MRI.createVirtualRegister(RC);
- unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
-
- unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
- unsigned LOADOpc = getLoadOpcode(VT);
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1 = LOAD [MI.addr]
- // mainMBB:
- // t4 = phi(t1 / thisMBB, t3 / mainMBB)
- // t1 = OP MI.val, EAX
- // EAX = t4
- // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
- // t3 = EAX
- // JNE mainMBB
- // sinkMBB:
- // dst = t3
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- }
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add a PHI.
- MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op opcode!");
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- case X86::ATOMXOR8:
- case X86::ATOMXOR16:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64: {
- unsigned ARITHOpc = getNonAtomicOpcode(Opc);
- BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
- .addReg(t4);
- break;
- }
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64: {
- unsigned Tmp = MRI.createVirtualRegister(RC);
- unsigned NOTOpc;
- unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
- BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
- .addReg(t4);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
- break;
- }
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64: {
- unsigned CMPOpc;
- unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
-
- BuildMI(mainMBB, DL, TII->get(CMPOpc))
- .addReg(SrcReg)
- .addReg(t4);
-
- if (Subtarget->hasCMov()) {
- if (VT != MVT::i8) {
- // Native support
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
- .addReg(SrcReg)
- .addReg(t4);
- } else {
- // Promote i8 to i32 to use CMOV32
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
- const TargetRegisterClass *RC32 =
- TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
- unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
- unsigned AccReg32 = MRI.createVirtualRegister(RC32);
- unsigned Tmp = MRI.createVirtualRegister(RC32);
-
- unsigned Undef = MRI.createVirtualRegister(RC32);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
- .addReg(Undef)
- .addReg(SrcReg)
- .addImm(X86::sub_8bit);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
- .addReg(Undef)
- .addReg(t4)
- .addImm(X86::sub_8bit);
-
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
- .addReg(SrcReg32)
- .addReg(AccReg32);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
- .addReg(Tmp, 0, X86::sub_8bit);
- }
- } else {
- // Use pseudo select and lower them.
- assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
- "Invalid atomic-load-op transformation!");
- unsigned SelOpc = getPseudoCMOVOpc(VT);
- X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
- assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
- MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
- .addReg(SrcReg).addReg(t4)
- .addImm(CC);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
- Phi->eraseFromParent();
- }
- break;
- }
- }
-
- // Copy PhyReg back from virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
- .addReg(t4);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.addReg(t2);
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy PhyReg back to virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
- .addReg(PhyReg);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstReg)
- .addReg(t3);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
-// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
-// instructions. They will be translated into a spin-loop or compare-exchange
-// loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1L = LOAD [MI.addr + 0]
-// t1H = LOAD [MI.addr + 4]
-// loop:
-// t4L = phi(t1L, t3L / loop)
-// t4H = phi(t1H, t3H / loop)
-// t2L = OP MI.val.lo, t4L
-// t2H = OP MI.val.hi, t4H
-// EAX = t4L
-// EDX = t4H
-// EBX = t2L
-// ECX = t2H
-// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
-// t3L = EAX
-// t3H = EDX
-// JNE loop
-// sink:
-// dstL = t3L
-// dstH = t3H
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op32 to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstLoReg, DstHiReg;
- unsigned SrcLoReg, SrcHiReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstLoReg = MI->getOperand(CurOp++).getReg();
- DstHiReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcLoReg = MI->getOperand(CurOp++).getReg();
- SrcHiReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const TargetRegisterClass *RC8 = &X86::GR8RegClass;
-
- unsigned t1L = MRI.createVirtualRegister(RC);
- unsigned t1H = MRI.createVirtualRegister(RC);
- unsigned t2L = MRI.createVirtualRegister(RC);
- unsigned t2H = MRI.createVirtualRegister(RC);
- unsigned t3L = MRI.createVirtualRegister(RC);
- unsigned t3H = MRI.createVirtualRegister(RC);
- unsigned t4L = MRI.createVirtualRegister(RC);
- unsigned t4H = MRI.createVirtualRegister(RC);
-
- unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
- unsigned LOADOpc = X86::MOV32rm;
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1L = LOAD [MI.addr + 0]
- // t1H = LOAD [MI.addr + 4]
- // mainMBB:
- // t4L = phi(t1L / thisMBB, t3L / mainMBB)
- // t4H = phi(t1H / thisMBB, t3H / mainMBB)
- // t2L = OP MI.val.lo, t4L
- // t2H = OP MI.val.hi, t4H
- // EBX = t2L
- // ECX = t2H
- // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
- // t3L = EAX
- // t3H = EDX
- // JNE loop
- // sinkMBB:
- // dstL = t3L
- // dstH = t3H
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- // Lo
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- };
- MachineInstr *LowMI = MIB;
-
- // Hi
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
- MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- } else {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- }
- MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add PHIs.
- MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
- .addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
- .addReg(SrcHiReg);
- break;
- }
- case X86::ATOMNAND6432: {
- unsigned HiOpc, NOTOpc;
- unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
- unsigned TmpL = MRI.createVirtualRegister(RC);
- unsigned TmpH = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
- .addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
- .addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
- break;
- }
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- unsigned cL = MRI.createVirtualRegister(RC8);
- unsigned cH = MRI.createVirtualRegister(RC8);
- unsigned cL32 = MRI.createVirtualRegister(RC);
- unsigned cH32 = MRI.createVirtualRegister(RC);
- unsigned cc = MRI.createVirtualRegister(RC);
- // cl := cmp src_lo, lo
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
- // ch := cmp src_hi, hi
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcHiReg).addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
- // cc := if (src_hi == hi) ? cl : ch;
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
- .addReg(cH32).addReg(cL32);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
- .addReg(cH32).addReg(cL32)
- .addImm(X86::COND_E);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- }
- BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
- .addReg(SrcHiReg).addReg(t4H);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
- .addReg(SrcLoReg).addReg(t4L)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
- // 2nd CMOV lowering.
- mainMBB->addLiveIn(X86::EFLAGS);
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
- .addReg(SrcHiReg).addReg(t4H)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
- PhiL->eraseFromParent();
- PhiH->eraseFromParent();
- }
- break;
- }
- case X86::ATOMSWAP6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
- break;
- }
- }
-
- // Copy EDX:EAX back from HiReg:LoReg
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
- // Copy ECX:EBX from t1H:t1L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy EDX:EAX back to t3H:t3L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstLoReg)
- .addReg(t3L);
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstHiReg)
- .addReg(t3H);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
@@ -16068,7 +17073,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -16324,7 +17329,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
@@ -16407,7 +17412,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -16433,7 +17438,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo* TRI = BB->getParent()->getTarget().getRegisterInfo();
if (!MI->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -16474,9 +17479,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
bool Is64Bit) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
@@ -16546,7 +17551,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
- getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ MF->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
@@ -16594,8 +17599,8 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetMacho());
@@ -16651,10 +17656,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
+ MachineFunction *F = BB->getParent();
const X86InstrInfo *TII
- = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ = static_cast<const X86InstrInfo*>(F->getTarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
- MachineFunction *F = BB->getParent();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
@@ -16663,7 +17668,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ F->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -16675,7 +17680,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
- } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ } else if (F->getTarget().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
@@ -16707,9 +17712,8 @@ MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -16771,8 +17775,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned PtrStoreOpc = 0;
unsigned LabelReg = 0;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
- Reloc::Model RM = getTargetMachine().getRelocationModel();
- bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ Reloc::Model RM = MF->getTarget().getRelocationModel();
+ bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
(RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
// Prepare IP either in reg or imm.
@@ -16816,7 +17820,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -16845,9 +17849,8 @@ MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
@@ -16863,7 +17866,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
@@ -17038,12 +18041,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineFunction *F = BB->getParent();
+ const TargetInstrInfo *TII = F->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
- MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
@@ -17123,7 +18126,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo());
+ return EmitPCMPSTRM(MI, BB, BB->getParent()->getTarget().getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
@@ -17136,71 +18139,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo());
+ return EmitPCMPSTRI(MI, BB, BB->getParent()->getTarget().getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
- return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget);
+ return EmitMonitor(MI, BB, BB->getParent()->getTarget().getInstrInfo(), Subtarget);
// xbegin
case X86::XBEGIN:
- return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo());
-
- // Atomic Lowering.
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- // Fall through
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- // Fall through
- case X86::ATOMXOR16:
- case X86::ATOMXOR8:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64:
- // Fall through
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64:
- // Fall through
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- // Fall through
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- // Fall through
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- // Fall through
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64:
- return EmitAtomicLoadArith(MI, BB);
-
- // This group does 64-bit operations on a 32-bit host.
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMNAND6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432:
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432:
- case X86::ATOMSWAP6432:
- return EmitAtomicLoadArith6432(MI, BB);
+ return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -17473,13 +18420,385 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Get the PSHUF-style mask from PSHUF node.
+///
+/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
+/// PSHUF-style masks that can be reused with such instructions.
+static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+ SmallVector<int, 4> Mask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+ (void)HaveMask;
+ assert(HaveMask);
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ return Mask;
+ case X86ISD::PSHUFLW:
+ Mask.resize(4);
+ return Mask;
+ case X86ISD::PSHUFHW:
+ Mask.erase(Mask.begin(), Mask.begin() + 4);
+ for (int &M : Mask)
+ M -= 4;
+ return Mask;
+ default:
+ llvm_unreachable("No valid shuffle instruction found!");
+ }
+}
+
+/// \brief Search for a combinable shuffle across a chain ending in pshufd.
+///
+/// We walk up the chain and look for a combinable shuffle, skipping over
+/// shuffles that we could hoist this shuffle's transformation past without
+/// altering anything.
+static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert(N.getOpcode() == X86ISD::PSHUFD &&
+ "Called with something other than an x86 128-bit half shuffle!");
+ SDLoc DL(N);
+
+ // Walk up a single-use chain looking for a combinable shuffle.
+ SDValue V = N.getOperand(0);
+ for (; V.hasOneUse(); V = V.getOperand(0)) {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing combined!
+
+ case ISD::BITCAST:
+ // Skip bitcasts as we always know the type for the target specific
+ // instructions.
+ continue;
+
+ case X86ISD::PSHUFD:
+ // Found another dword shuffle.
+ break;
+
+ case X86ISD::PSHUFLW:
+ // Check that the low words (being shuffled) are the identity in the
+ // dword shuffle, and the high words are self-contained.
+ if (Mask[0] != 0 || Mask[1] != 1 ||
+ !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
+ return false;
+
+ continue;
+
+ case X86ISD::PSHUFHW:
+ // Check that the high words (being shuffled) are the identity in the
+ // dword shuffle, and the low words are self-contained.
+ if (Mask[2] != 2 || Mask[3] != 3 ||
+ !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
+ return false;
+
+ continue;
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
+ // shuffle into a preceding word shuffle.
+ if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
+ return false;
+
+ // Search for a half-shuffle which we can combine with.
+ unsigned CombineOp =
+ V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
+ if (V.getOperand(0) != V.getOperand(1) ||
+ !V->isOnlyUserOf(V.getOperand(0).getNode()))
+ return false;
+ V = V.getOperand(0);
+ do {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing to combine.
+
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ if (V.getOpcode() == CombineOp)
+ break;
+
+ // Fallthrough!
+ case ISD::BITCAST:
+ V = V.getOperand(0);
+ continue;
+ }
+ break;
+ } while (V.hasOneUse());
+ break;
+ }
+ // Break out of the loop if we break out of the switch.
+ break;
+ }
+
+ if (!V.hasOneUse())
+ // We fell out of the loop without finding a viable combining instruction.
+ return false;
+
+ // Record the old value to use in RAUW-ing.
+ SDValue Old = V;
+
+ // Merge this node's mask and our incoming mask.
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ for (int &M : Mask)
+ M = VMask[M];
+ V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // It is possible that one of the combinable shuffles was completely absorbed
+ // by the other, just replace it and revisit all users in that case.
+ if (Old.getNode() == V.getNode()) {
+ DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true);
+ return true;
+ }
+
+ // Replace N with its operand as we're going to combine that shuffle away.
+ DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
+ return true;
+}
+
+/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
+///
+/// We walk up the chain, skipping shuffles of the other half and looking
+/// through shuffles which switch halves trying to find a shuffle of the same
+/// pair of dwords.
+static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert(
+ (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
+ "Called with something other than an x86 128-bit half shuffle!");
+ SDLoc DL(N);
+ unsigned CombineOpcode = N.getOpcode();
+
+ // Walk up a single-use chain looking for a combinable shuffle.
+ SDValue V = N.getOperand(0);
+ for (; V.hasOneUse(); V = V.getOperand(0)) {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing combined!
+
+ case ISD::BITCAST:
+ // Skip bitcasts as we always know the type for the target specific
+ // instructions.
+ continue;
+
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ if (V.getOpcode() == CombineOpcode)
+ break;
+
+ // Other-half shuffles are no-ops.
+ continue;
+
+ case X86ISD::PSHUFD: {
+ // We can only handle pshufd if the half we are combining either stays in
+ // its half, or switches to the other half. Bail if one of these isn't
+ // true.
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2;
+ if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) ||
+ (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2)))
+ return false;
+
+ // Map the mask through the pshufd and keep walking up the chain.
+ for (int i = 0; i < 4; ++i)
+ Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2;
+
+ // Switch halves if the pshufd does.
+ CombineOpcode =
+ VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
+ continue;
+ }
+ }
+ // Break out of the loop if we break out of the switch.
+ break;
+ }
+
+ if (!V.hasOneUse())
+ // We fell out of the loop without finding a viable combining instruction.
+ return false;
+
+ // Record the old value to use in RAUW-ing.
+ SDValue Old = V;
+
+ // Merge this node's mask and our incoming mask (adjusted to account for all
+ // the pshufd instructions encountered).
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ for (int &M : Mask)
+ M = VMask[M];
+ V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Replace N with its operand as we're going to combine that shuffle away.
+ DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
+ return true;
+}
+
+/// \brief Try to combine x86 target specific shuffles.
+static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ MVT VT = N.getSimpleValueType();
+ SmallVector<int, 4> Mask;
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ Mask = getPSHUFShuffleMask(N);
+ assert(Mask.size() == 4);
+ break;
+ default:
+ return SDValue();
+ }
+
+ // Nuke no-op shuffles that show up after combining.
+ if (isNoopShuffleMask(Mask))
+ return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
+
+ // Look for simplifications involving one or two shuffle instructions.
+ SDValue V = N.getOperand(0);
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ assert(VT == MVT::v8i16);
+ (void)VT;
+
+ if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
+ return SDValue(); // We combined away this shuffle, so we're done.
+
+ // See if this reduces to a PSHUFD which is no more expensive and can
+ // combine with more operations.
+ if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
+ areAdjacentMasksSequential(Mask)) {
+ int DMask[] = {-1, -1, -1, -1};
+ int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
+ DMask[DOffset + 0] = DOffset + Mask[0] / 2;
+ DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+ DCI.AddToWorklist(V.getNode());
+ V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
+ getV4X86ShuffleImm8ForMask(DMask, DAG));
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ }
+
+ // Look for shuffle patterns which can be implemented as a single unpack.
+ // FIXME: This doesn't handle the location of the PSHUFD generically, and
+ // only works when we have a PSHUFD followed by two half-shuffles.
+ if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
+ (V.getOpcode() == X86ISD::PSHUFLW ||
+ V.getOpcode() == X86ISD::PSHUFHW) &&
+ V.getOpcode() != N.getOpcode() &&
+ V.hasOneUse()) {
+ SDValue D = V.getOperand(0);
+ while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
+ D = D.getOperand(0);
+ if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
+ int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
+ int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
+ int WordMask[8];
+ for (int i = 0; i < 4; ++i) {
+ WordMask[i + NOffset] = Mask[i] + NOffset;
+ WordMask[i + VOffset] = VMask[i] + VOffset;
+ }
+ // Map the word mask through the DWord mask.
+ int MappedMask[8];
+ for (int i = 0; i < 8; ++i)
+ MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
+ const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
+ const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
+ if (std::equal(std::begin(MappedMask), std::end(MappedMask),
+ std::begin(UnpackLoMask)) ||
+ std::equal(std::begin(MappedMask), std::end(MappedMask),
+ std::begin(UnpackHiMask))) {
+ // We can replace all three shuffles with an unpack.
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
+ : X86ISD::UNPCKH,
+ DL, MVT::v8i16, V, V);
+ }
+ }
+ }
+
+ break;
+
+ case X86ISD::PSHUFD:
+ if (combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ return SDValue(); // We combined away this shuffle.
+
+ break;
+ }
+
+ return SDValue();
+}
+
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ // Canonicalize shuffles that perform 'addsub' on packed float vectors
+ // according to the rule:
+ // (shuffle (FADD A, B), (FSUB A, B), Mask) ->
+ // (shuffle (FSUB A, -B), (FADD A, -B), Mask)
+ //
+ // Where 'Mask' is:
+ // <0,5,2,7> -- for v4f32 and v4f64 shuffles;
+ // <0,3> -- for v2f64 shuffles;
+ // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles.
+ //
+ // This helps pattern-matching more SSE3/AVX ADDSUB instructions
+ // during ISel stage.
+ if (N->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB &&
+ // Operands to the FADD and FSUB must be the same.
+ ((N0->getOperand(0) == N1->getOperand(0) &&
+ N0->getOperand(1) == N1->getOperand(1)) ||
+ // FADD is commutable. See if by commuting the operands of the FADD
+ // we would still be able to match the operands of the FSUB dag node.
+ (N0->getOperand(1) == N1->getOperand(0) &&
+ N0->getOperand(0) == N1->getOperand(1))) &&
+ N0->getOperand(0)->getOpcode() != ISD::UNDEF &&
+ N0->getOperand(1)->getOpcode() != ISD::UNDEF) {
+
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ ArrayRef<int> Mask = SV->getMask();
+ bool CanFold = true;
+
+ for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i)
+ CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i);
+
+ if (CanFold) {
+ SDValue Op0 = N1->getOperand(0);
+ SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1));
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1);
+ return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask);
+ }
+ }
+
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
@@ -17490,6 +18809,57 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
+ // During Type Legalization, when promoting illegal vector types,
+ // the backend might introduce new shuffle dag nodes and bitcasts.
+ //
+ // This code performs the following transformation:
+ // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
+ // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
+ //
+ // We do this only if both the bitcast and the BINOP dag nodes have
+ // one use. Also, perform this transformation only if the new binary
+ // operation is legal. This is to avoid introducing dag nodes that
+ // potentially need to be further expanded (or custom lowered) into a
+ // less optimal sequence of dag nodes.
+ if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
+ N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::BITCAST) {
+ SDValue BC0 = N0.getOperand(0);
+ EVT SVT = BC0.getValueType();
+ unsigned Opcode = BC0.getOpcode();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if (BC0.hasOneUse() && SVT.isVector() &&
+ SVT.getVectorNumElements() * 2 == NumElts &&
+ TLI.isOperationLegal(Opcode, VT)) {
+ bool CanFold = false;
+ switch (Opcode) {
+ default : break;
+ case ISD::ADD :
+ case ISD::FADD :
+ case ISD::SUB :
+ case ISD::FSUB :
+ case ISD::MUL :
+ case ISD::FMUL :
+ CanFold = true;
+ }
+
+ unsigned SVTNumElts = SVT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
+ for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) < 0;
+
+ if (CanFold) {
+ SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
+ SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
+ return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
+ }
+ }
+ }
+
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
@@ -17501,7 +18871,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ if (LD.getNode())
+ return LD;
+
+ if (isTargetShuffle(N->getOpcode())) {
+ SDValue Shuffle =
+ PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
+ if (Shuffle.getNode())
+ return Shuffle;
+ }
+
+ return SDValue();
}
/// PerformTruncateCombine - Converts truncate operation to
@@ -18155,28 +19536,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
- // If the RHS is a constant we have to reverse the const canonicalization.
- // x > C-1 ? x+-C : 0 --> subus x, C
- if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
- APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1)
- return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getConstant(-A, VT));
- }
-
- // Another special case: If C was a sign bit, the sub has been
- // canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine whether
- // it's safe to decanonicalize the xor?
- // x s< 0 ? x^C : 0 --> subus x, C
- if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- isSplatVector(OpRHS.getNode())) {
- APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (A.isSignBit())
- return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
- }
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
+ if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> subus x, C
+ if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+ CondRHSConst->getAPIntValue() ==
+ (-OpRHSConst->getAPIntValue() - 1))
+ return DAG.getNode(
+ X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(-OpRHSConst->getAPIntValue(), VT));
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> subus x, C
+ if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ OpRHSConst->getAPIntValue().isSignBit())
+ // Note that we have to rebuild the RHS constant here to ensure we
+ // don't rely on particular values of undef lanes.
+ return DAG.getNode(
+ X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(OpRHSConst->getAPIntValue(), VT));
+ }
}
}
@@ -18743,6 +20130,8 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
if (C->isAllOnesValue())
return Op1;
}
+
+ return SDValue();
}
// Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
@@ -18882,16 +20271,15 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
// (shl V, 1) -> add V,V
- if (isSplatVector(N1.getNode())) {
- assert(N0.getValueType().isVector() && "Invalid vector shift type");
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
- // We shift all of the values by one. In many cases we do not have
- // hardware support for this operation. This is better expressed as an ADD
- // of two values.
- if (N1C && (1 == N1C->getZExtValue())) {
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
+ assert(N0.getValueType().isVector() && "Invalid vector shift type");
+ // We shift all of the values by one. In many cases we do not have
+ // hardware support for this operation. This is better expressed as an ADD
+ // of two values.
+ if (N1SplatC->getZExtValue() == 1)
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
- }
return SDValue();
}
@@ -18910,10 +20298,9 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
SDValue Amt = N->getOperand(1);
SDLoc DL(N);
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
- APInt ShiftAmt = C->getAPIntValue();
+ if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
+ if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
+ APInt ShiftAmt = AmtSplat->getAPIntValue();
unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
@@ -18923,7 +20310,6 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
if (ShiftAmt.trunc(8).uge(MaxAmount))
return getZeroVector(VT, Subtarget, DAG, DL);
}
- }
return SDValue();
}
@@ -19117,9 +20503,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// The right side has to be a 'trunc' or a constant vector.
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
- bool RHSConst = (isSplatVector(N1.getNode()) &&
- isa<ConstantSDNode>(N1->getOperand(0)));
- if (!RHSTrunc && !RHSConst)
+ ConstantSDNode *RHSConstSplat = nullptr;
+ if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
+ RHSConstSplat = RHSBV->getConstantSplatNode();
+ if (!RHSTrunc && !RHSConstSplat)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -19129,9 +20516,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
- if (RHSConst) {
+ if (RHSConstSplat) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
- N1->getOperand(0));
+ SDValue(RHSConstSplat, 0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
} else if (RHSTrunc) {
@@ -19277,12 +20664,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
unsigned SraAmt = ~0;
if (Mask.getOpcode() == ISD::SRA) {
- SDValue Amt = Mask.getOperand(1);
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
- SraAmt = C->getZExtValue();
- }
+ if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
+ if (auto *AmtConst = AmtBV->getConstantSplatNode())
+ SraAmt = AmtConst->getZExtValue();
} else if (Mask.getOpcode() == X86ISD::VSRAI) {
SDValue SraC = Mask.getOperand(1);
SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
@@ -20642,6 +22026,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
case X86ISD::INSERTPS:
return PerformINSERTPSCombine(N, DAG, Subtarget);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget);
}
return SDValue();
@@ -21146,8 +22531,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
// If we require an extra load to get this address, as in PIC mode, we
// can't accept it.
- if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
- getTargetMachine())))
+ if (isGlobalStubReference(
+ Subtarget->ClassifyGlobalReference(GV, DAG.getTarget())))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
@@ -21425,3 +22810,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
return AM.Scale != 0;
return -1;
}
+
+bool X86TargetLowering::isTargetFTOL() const {
+ return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9f51b53..c8cdce7 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,13 +15,13 @@
#ifndef X86ISELLOWERING_H
#define X86ISELLOWERING_H
-#include "X86Subtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
namespace llvm {
+ class X86Subtarget;
class X86TargetMachine;
namespace X86ISD {
@@ -86,6 +86,9 @@ namespace llvm {
/// X86 Read Time-Stamp Counter and Processor ID.
RDTSCP_DAG,
+ /// X86 Read Performance Monitoring Counters.
+ RDPMC_DAG,
+
/// X86 compare and logical compare instructions.
CMP, COMI, UCOMI,
@@ -315,6 +318,8 @@ namespace llvm {
KORTEST,
// Several flavors of instructions with vector shuffle behaviors.
+ PACKSS,
+ PACKUS,
PALIGNR,
PSHUFD,
PSHUFHW,
@@ -400,23 +405,8 @@ namespace llvm {
// XTEST - Test if in transactional execution.
XTEST,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
- // Atomic 64-bit binary operations.
- ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMMAX64_DAG,
- ATOMMIN64_DAG,
- ATOMUMAX64_DAG,
- ATOMUMIN64_DAG,
- ATOMSWAP64_DAG,
-
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
- LCMPXCHG_DAG,
+ LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
@@ -766,9 +756,7 @@ namespace llvm {
/// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
/// for fptoui.
- bool isTargetFTOL() const {
- return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
- }
+ bool isTargetFTOL() const;
/// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
/// used for fptoui to the given type.
@@ -808,6 +796,9 @@ namespace llvm {
/// \brief Reset the operation actions based on target options.
void resetOperationActions() override;
+ /// \brief Customize the preferred legalization strategy for certain types.
+ LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(MVT VT) const override;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 37bcc52..41e900e 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -476,6 +476,28 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VT1>;
+multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ RegisterClass KRC> {
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+ def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
+ x86memop:$src),
+ !strconcat(OpcodeStr,
+ " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_KZ;
+ }
+}
+
+defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+ i128mem, loadv2i64, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+ i256mem, loadv4i64, VK16WM>, VEX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
+
def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
(VPBROADCASTDZrr VR128X:$src)>;
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
@@ -517,10 +539,12 @@ def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
[]>, EVEX;
}
+let Predicates = [HasCDI] in {
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
VK16, v16i32, v16i1>, EVEX_V512;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
@@ -585,7 +609,7 @@ defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
// -- VPERM2I - 3 source operands form --
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop,
- SDNode OpNode, ValueType OpVT> {
+ SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
let Constraints = "$src1 = $dst" in {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
@@ -595,48 +619,107 @@ let Constraints = "$src1 = $dst" in {
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
EVEX_4V;
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}}|"
+ "$dst {${mask}}, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ RC:$src3),
+ RC:$src1)))]>,
+ EVEX_4V, EVEX_K;
+
+ let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}} {z} |",
+ "$dst {${mask}} {z}, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ RC:$src3),
+ (OpVT (bitconvert
+ (v16i32 immAllZerosV))))))]>,
+ EVEX_4V, EVEX_KZ;
+
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
" \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2,
+ (OpVT (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3))))]>, EVEX_4V;
+
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}}|"
+ "$dst {${mask}}, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)),
+ RC:$src1)))]>,
+ EVEX_4V, EVEX_K;
+
+ let AddedComplexity = 10 in // Prefer over the rrkz variant
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)),
+ (OpVT (bitconvert
+ (v16i32 immAllZerosV))))))]>,
+ EVEX_4V, EVEX_KZ;
}
}
-defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem,
- X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem,
- X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem,
- X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
- X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
- X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
- X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
- X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
- X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx),
- (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))),
- (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx),
- (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))),
- (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx),
- (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))),
- (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx),
- (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))),
- (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
+ i512mem, X86VPermiv3, v16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
+ i512mem, X86VPermiv3, v8i64, VK8WM>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
+ i512mem, X86VPermiv3, v16f32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
+ i512mem, X86VPermiv3, v8f64, VK8WM>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ SDNode OpNode, ValueType OpVT, RegisterClass KRC,
+ ValueType MaskVT, RegisterClass MRC> :
+ avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
+ OpVT, KRC> {
+ def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
+ VR512:$idx, VR512:$src1, VR512:$src2, -1)),
+ (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+ def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
+ VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
+ (!cast<Instruction>(NAME#rrk) VR512:$src1,
+ (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
+}
+
+defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
+ X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
+ X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
+ X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
+ X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
@@ -790,52 +873,61 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
-multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
+multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt, Operand CC, string asm,
- string asm_alt> {
+ SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
def rri : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
def rmi : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
}
-defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32,
- X86cmpm, v16i32, AVXCC,
- "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32,
- X86cmpmu, v16i32, AVXCC,
- "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
- X86cmpm, v8i64, AVXCC,
- "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
- X86cmpmu, v8i64, AVXCC,
- "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-// avx512_cmp_packed - sse 1 & 2 compare packed instructions
+defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
+ X86cmpm, v16i32, AVXCC, "d">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
+ X86cmpmu, v16i32, AVXCC, "ud">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
+ X86cmpm, v8i64, AVXCC, "q">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
+ X86cmpmu, v8i64, AVXCC, "uq">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// avx512_cmp_packed - compare packed instructions
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
X86MemOperand x86memop, ValueType vt,
string suffix, Domain d> {
@@ -859,11 +951,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
" \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
" \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
}
@@ -1788,6 +1880,46 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
(SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
//===----------------------------------------------------------------------===//
+// AVX-512 - Non-temporals
+//===----------------------------------------------------------------------===//
+
+def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_movntdqa addr:$src))]>,
+ EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// Prefer non-temporal over temporal versions
+let AddedComplexity = 400, SchedRW = [WriteStore] in {
+
+def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v16f32 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+
+def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8i64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+}
+
+//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3161,6 +3293,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
+def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
+ (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
+ (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
(VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
@@ -4343,6 +4479,37 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+let Predicates = [HasCDI] in {
+defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
+ i512mem, i32mem, "{1to16}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+
+defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
+ i512mem, i64mem, "{1to8}">,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+}
+
+def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
+ GR16:$mask),
+ (VPLZCNTDrrk VR512:$src1,
+ (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
+
+def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
+ GR8:$mask),
+ (VPLZCNTQrrk VR512:$src1,
+ (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+
+def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
+ (VPLZCNTDrm addr:$src)>;
+def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
+ (VPLZCNTDrr VR512:$src)>;
+def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
+ (VPLZCNTQrm addr:$src)>;
+def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
+ (VPLZCNTQrr VR512:$src)>;
+
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 368e14b..f2574cc 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1278,8 +1278,10 @@ let isCompare = 1 in {
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
- // register class is constrained to GR8_NOREX.
- let isPseudo = 1 in
+ // register class is constrained to GR8_NOREX. This pseudo is explicitly
+ // marked side-effect free, since it doesn't have an isel pattern like
+ // other test instructions.
+ let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 34d8fb9..ca4f608 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -110,7 +110,7 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
// When using segmented stacks these are lowered into instructions which first
// check if the current stacklet has enough free memory. If it does, memory is
-// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
// the heap.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
@@ -197,6 +197,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
}
//===----------------------------------------------------------------------===//
+// Pseudo instructions used by unwind info.
+//
+let isPseudo = 1 in {
+ def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
+ "#SEH_PushReg $reg", []>;
+ def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveReg $reg, $dst", []>;
+ def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveXMM $reg, $dst", []>;
+ def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
+ "#SEH_StackAlloc $size", []>;
+ def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
+ "#SEH_SetFrame $reg, $offset", []>;
+ def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
+ "#SEH_PushFrame $mode", []>;
+ def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
+ "#SEH_EndPrologue", []>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
@@ -371,7 +391,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
Requires<[In64BitMode]>;
-
+
let Uses = [RAX,RCX,RDI] in
def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
@@ -502,83 +522,6 @@ def CMOV_RFP80 : I<0, Pseudo,
//===----------------------------------------------------------------------===//
-// Atomic Instruction Pseudo Instructions
-//===----------------------------------------------------------------------===//
-
-// Pseudo atomic instructions
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
- let Defs = [EFLAGS, AL] in
- def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
- (ins i8mem:$ptr, GR8:$val),
- !strconcat(mnemonic, "8 PSEUDO!"), []>;
- let Defs = [EFLAGS, AX] in
- def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
- (ins i16mem:$ptr, GR16:$val),
- !strconcat(mnemonic, "16 PSEUDO!"), []>;
- let Defs = [EFLAGS, EAX] in
- def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
- (ins i32mem:$ptr, GR32:$val),
- !strconcat(mnemonic, "32 PSEUDO!"), []>;
- let Defs = [EFLAGS, RAX] in
- def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
- (ins i64mem:$ptr, GR64:$val),
- !strconcat(mnemonic, "64 PSEUDO!"), []>;
- }
-}
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
- def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
- (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
- (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
- (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
- (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
-}
-
-// Atomic exchange, and, or, xor
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
-
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
- mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
- def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- !strconcat(mnemonic, "6432 PSEUDO!"), []>;
-}
-
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
-defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
-defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
-defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
-
-//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1696,20 +1639,34 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
// Increment reg.
-def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+// Do not make INC if it is slow
+def : Pat<(add GR8:$src, 1),
+ (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, 1),
+ (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, 1),
+ (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, 1),
+ (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// Decrement reg.
-def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+// Do not make DEC if it is slow
+def : Pat<(add GR8:$src, -1),
+ (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, -1),
+ (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1582f43..6f0fa94 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
+def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
+
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6993577..0d3afc4 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -97,14 +98,11 @@ struct X86OpTblEntry {
// Pin the vtable to this file.
void X86InstrInfo::anchor() {}
-X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
- : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::ADJCALLSTACKDOWN64
- : X86::ADJCALLSTACKDOWN32),
- (tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::ADJCALLSTACKUP64
- : X86::ADJCALLSTACKUP32)),
- TM(tm), RI(tm) {
+X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
+ : X86GenInstrInfo(
+ (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+ (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ Subtarget(STI), RI(STI) {
static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
@@ -1472,7 +1470,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
- if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (!Subtarget.is64Bit())
// It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode.
return false;
@@ -1950,7 +1948,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
unsigned Opc, leaInReg;
- if (TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ if (Subtarget.is64Bit()) {
Opc = X86::LEA64_32r;
leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
} else {
@@ -2006,7 +2004,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// just a single insert_subreg.
addRegReg(MIB, leaInReg, true, leaInReg, false);
} else {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.is64Bit())
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
else
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
@@ -2076,13 +2074,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// we have better subtarget support, enable the 16-bit LEA generation here.
// 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ bool is64Bit = Subtarget.is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
+ if (!Subtarget.hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
@@ -2094,7 +2092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::SHUFPDrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
+ if (!Subtarget.hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
@@ -2672,8 +2670,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
/// getSETFromCond - Return a set opcode for the given condition and
/// whether it has memory operand.
-static unsigned getSETFromCond(X86::CondCode CC,
- bool HasMemoryOperand) {
+unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
static const uint16_t Opc[16][2] = {
{ X86::SETAr, X86::SETAm },
{ X86::SETAEr, X86::SETAEm },
@@ -2693,14 +2690,14 @@ static unsigned getSETFromCond(X86::CondCode CC,
{ X86::SETSr, X86::SETSm }
};
- assert(CC < 16 && "Can only handle standard cond codes");
+ assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
return Opc[CC][HasMemoryOperand ? 1 : 0];
}
/// getCMovFromCond - Return a cmov opcode for the given condition,
/// register size in bytes, and operand type.
-static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand) {
+unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
@@ -2976,7 +2973,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
- if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ if (!Subtarget.hasCMov())
return false;
if (Cond.size() != 1)
return false;
@@ -3027,8 +3024,7 @@ static bool isHReg(unsigned Reg) {
// Try and copy between VR128/VR64 and GR64 registers.
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
- const X86Subtarget& Subtarget) {
-
+ const X86Subtarget &Subtarget) {
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
@@ -3107,8 +3103,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// First deal with the normal symmetric copies.
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -3120,7 +3116,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ Subtarget.is64Bit()) {
Opc = X86::MOV8rr_NOREX;
// Both operands must be encodable without an REX prefix.
assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
@@ -3137,7 +3133,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
if (!Opc)
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>());
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -3183,9 +3179,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
static unsigned getLoadStoreRegOpcode(unsigned Reg,
const TargetRegisterClass *RC,
bool isStackAligned,
- const TargetMachine &TM,
+ const X86Subtarget &STI,
bool load) {
- if (TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (STI.hasAVX512()) {
if (X86::VK8RegClass.hasSubClassEq(RC) ||
X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
@@ -3197,13 +3193,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = STI.hasAVX();
switch (RC->getSize()) {
default:
llvm_unreachable("Unknown spill size");
case 1:
assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (STI.is64Bit())
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
@@ -3270,16 +3266,16 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
static unsigned getStoreRegOpcode(unsigned SrcReg,
const TargetRegisterClass *RC,
bool isStackAligned,
- TargetMachine &TM) {
- return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+ const X86Subtarget &STI) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
}
static unsigned getLoadRegOpcode(unsigned DestReg,
const TargetRegisterClass *RC,
bool isStackAligned,
- const TargetMachine &TM) {
- return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+ const X86Subtarget &STI) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -3291,9 +3287,10 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ bool isAligned =
+ (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
.addReg(SrcReg, getKillRegState(isKill));
@@ -3309,7 +3306,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
@@ -3327,9 +3324,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ bool isAligned =
+ (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
}
@@ -3343,7 +3341,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
@@ -3741,7 +3739,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
continue;
// EFLAGS is used by this instruction.
- X86::CondCode OldCC;
+ X86::CondCode OldCC = X86::COND_INVALID;
bool OpcIsSET = false;
if (IsCmpZero || IsSwapped) {
// We decode the condition code from opcode.
@@ -3964,7 +3962,7 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
}
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {
case X86::MOV32r0:
@@ -4075,7 +4073,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned Size, unsigned Align) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
- bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
+ bool isCallRegIndirect = Subtarget.callRegIndirect();
bool isTwoAddrFold = false;
// Atom favors register form of call. So, we do not fold loads into calls
@@ -4316,7 +4314,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = Subtarget.hasAVX();
unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
@@ -4352,7 +4350,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
- Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
+ Alignment = std::min(
+ Alignment, MF.getTarget().getFrameLowering()->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4453,14 +4452,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
- if (TM.getCodeModel() != CodeModel::Small &&
- TM.getCodeModel() != CodeModel::Kernel)
+ if (MF.getTarget().getCodeModel() != CodeModel::Small &&
+ MF.getTarget().getCodeModel() != CodeModel::Kernel)
return nullptr;
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (MF.getTarget().getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget.is64Bit())
PICBase = X86::RIP;
else
// FIXME: PICBase = getGlobalBaseReg(&MF);
@@ -4600,7 +4599,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@@ -4748,13 +4747,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Do not introduce a slow unaligned load.
return false;
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
- Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
@@ -4791,15 +4790,15 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Do not introduce a slow unaligned store.
return false;
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
- SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
- isAligned, TM),
- dl, MVT::Other, AddrOps);
+ SDNode *Store =
+ DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
+ dl, MVT::Other, AddrOps);
NewNodes.push_back(Store);
// Preserve memory reference information.
@@ -4960,7 +4959,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
- if (TM.getSubtargetImpl()->is64Bit()) {
+ if (Subtarget.is64Bit()) {
if (NumLoads >= 3)
return false;
} else if (NumLoads) {
@@ -4986,7 +4985,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
// Check if this processor supports macro-fusion. Since this is a minor
// heuristic, we haven't specifically reserved a feature. hasAVX is a decent
// proxy for SandyBridge+.
- if (!TM.getSubtarget<X86Subtarget>().hasAVX())
+ if (!Subtarget.hasAVX())
return false;
enum {
@@ -5038,6 +5037,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
case X86::TEST16rm:
case X86::TEST32rm:
case X86::TEST64rm:
+ case X86::TEST8ri_NOREX:
case X86::AND16i16:
case X86::AND16ri:
case X86::AND16ri8:
@@ -5168,7 +5168,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ assert(!Subtarget.is64Bit() &&
"X86-64 PIC uses RIP relative addressing");
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
@@ -5271,7 +5271,7 @@ static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
- bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ bool hasAVX2 = Subtarget.hasAVX2();
uint16_t validDomains = 0;
if (domain && lookup(MI->getOpcode(), domain))
validDomains = 0xe;
@@ -5286,7 +5286,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(dom && "Not an SSE instruction");
const uint16_t *table = lookup(MI->getOpcode(), dom);
if (!table) { // try the other table
- assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ assert((Subtarget.hasAVX2() || Domain < 3) &&
"256-bit vector operations only available in AVX2");
table = lookupAVX2(MI->getOpcode(), dom);
}
@@ -5299,6 +5299,16 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+void X86InstrInfo::getUnconditionalBranch(
+ MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+ Branch.setOpcode(X86::JMP_4);
+ Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+}
+
+void X86InstrInfo::getTrap(MCInst &MI) const {
+ MI.setOpcode(X86::TRAP);
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5f34915..c177e3a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -24,7 +24,7 @@
namespace llvm {
class X86RegisterInfo;
- class X86TargetMachine;
+ class X86Subtarget;
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
@@ -46,6 +46,7 @@ namespace X86 {
COND_O = 13,
COND_P = 14,
COND_S = 15,
+ LAST_VALID_COND = COND_S,
// Artificial condition codes. These are used by AnalyzeBranch
// to indicate a block terminated with two conditional branches to
@@ -61,12 +62,21 @@ namespace X86 {
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
+ /// \brief Return a set opcode for the given condition and whether it has
+ /// a memory operand.
+ unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+
+ /// \brief Return a cmov opcode for the given condition, register size in
+ /// bytes, and operand type.
+ unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand = false);
+
// Turn CMov opcode into condition code.
CondCode getCondFromCMovOpc(unsigned Opc);
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
- CondCode GetOppositeBranchCondition(X86::CondCode CC);
+ CondCode GetOppositeBranchCondition(CondCode CC);
} // end namespace X86;
@@ -129,7 +139,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
}
class X86InstrInfo final : public X86GenInstrInfo {
- X86TargetMachine &TM;
+ X86Subtarget &Subtarget;
const X86RegisterInfo RI;
/// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
@@ -156,7 +166,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
virtual void anchor();
public:
- explicit X86InstrInfo(X86TargetMachine &tm);
+ explicit X86InstrInfo(X86Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -396,6 +406,12 @@ public:
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+ void
+ getUnconditionalBranch(MCInst &Branch,
+ const MCSymbolRefExpr *BranchTarget) const override;
+
+ void getTrap(MCInst &MI) const override;
+
bool isHighLatencyDef(int opc) const override;
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0d97669..e7b532c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -155,27 +155,6 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -208,6 +187,8 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
@@ -795,6 +776,7 @@ def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
+def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1eb0485..f9a5ae1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4337,20 +4337,6 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
-// SSE2 - Packed Integer Pack Instructions
-//===---------------------------------------------------------------------===//
-
-defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
- int_x86_avx2_packsswb,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
- int_x86_avx2_packssdw,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
- int_x86_avx2_packuswb,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-
-//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
@@ -4432,6 +4418,136 @@ let Predicates = [UseSSE2] in {
}
//===---------------------------------------------------------------------===//
+// Packed Integer Pack Instructions (SSE & AVX)
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+ bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode VR128:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : PDI<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def Yrm : PDI<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+ bit Is2Addr = 1> {
+ def rr : SS48I<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def rm : SS48I<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode VR128:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : SS48I<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def Yrm : SS48I<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
+ bc_v4i32, 0>, VEX_4V;
+
+ defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
+ bc_v4i32, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
+ bc_v8i32>, VEX_4V, VEX_L;
+
+ defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
+ bc_v8i32>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
+ bc_v8i16>;
+ defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
+ bc_v4i32>;
+
+ defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
+ bc_v8i16>;
+
+ let Predicates = [HasSSE41] in
+ defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
+ bc_v4i32>;
+}
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Unpack Instructions
//===---------------------------------------------------------------------===//
@@ -5239,6 +5355,60 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
f128mem, SSE_ALU_F64P>, PD;
}
+// Patterns used to select 'addsub' instructions.
+let Predicates = [HasAVX] in {
+ // Constant 170 corresponds to the binary mask '10101010'.
+ // When used as a blend mask, it allows selecting eight elements from two
+ // input vectors as follow:
+ // - Even-numbered values in the destination are copied from
+ // the corresponding elements in the first input vector;
+ // - Odd-numbered values in the destination are copied from
+ // the corresponding elements in the second input vector.
+
+ def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
+ (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
+ (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the two pattens below, constant 10 is used as a blend mask to select
+ // - the 1st and 3rd element from the first input vector (the 'fsub' node);
+ // - the 2nd and 4th element from the second input vector (the 'fadd' node).
+
+ def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
+let Predicates = [UseSSE3] in {
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the pattern below, it is used as a blend mask to select:
+ // - the 1st and 3rd element from the first input vector (the fsub node);
+ // - the 2nd and 4th element from the second input vector (the fadd node).
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 Instructions
//===---------------------------------------------------------------------===//
@@ -7053,8 +7223,6 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in {
let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V;
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
@@ -7086,9 +7254,6 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw, WriteShuffle>,
- VEX_4V, VEX_L;
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
@@ -7120,8 +7285,6 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw,
- 1, DEFAULT_ITINS_SHUFFLESCHED>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
@@ -7969,6 +8132,16 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
+class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT,
+ PatFrag ld_frag, SchedWrite Sched> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
+ Sched<[Sched]>, VEX {
+ let mayLoad = 1;
+}
+
// AVX2 adds register forms
class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
Intrinsic Int, SchedWrite Sched> :
@@ -7977,16 +8150,15 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcast_ss, WriteLoad>;
- def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256,
- WriteFShuffleLd>, VEX_L;
+ def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+ f32mem, v4f32, loadf32, WriteLoad>;
+ def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+ f32mem, v8f32, loadf32,
+ WriteFShuffleLd>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256,
- WriteFShuffleLd>, VEX_L;
+def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+ v4f64, loadf64, WriteFShuffleLd>, VEX_L;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256,
WriteFShuffleLd>, VEX_L;
@@ -8366,6 +8538,21 @@ let Predicates = [HasF16C] in {
(VCVTPH2PSrm addr:$src)>;
}
+// Patterns for matching conversions from float to half-float and vice versa.
+let Predicates = [HasF16C] in {
+ def : Pat<(f32_to_f16 FR32:$src),
+ (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
+ (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+
+ def : Pat<(f16_to_f32 GR16:$src),
+ (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+ (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
+
+ def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
+ (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+ (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+}
+
//===----------------------------------------------------------------------===//
// AVX2 Instructions
//===----------------------------------------------------------------------===//
@@ -8543,13 +8730,6 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
}
let Predicates = [HasAVX] in {
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDYrm addr:$src)>;
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
-
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index b5595cb..5402780 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -439,7 +439,10 @@ def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
+
+let Defs = [RAX, RDX], Uses = [ECX] in
+ def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
+ TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index e969ef2..a082c4f 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -432,7 +432,7 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
// SSE Callback should be called for SSE-enabled LLVM.
return X86CompilationCallback_SSE;
#else
- if (Subtarget->hasSSE1())
+ if (useSSE)
return X86CompilationCallback_SSE;
#endif
#endif
@@ -440,8 +440,8 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
return X86CompilationCallback;
}
-X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
- Subtarget = &TM.getSubtarget<X86Subtarget>();
+X86JITInfo::X86JITInfo(bool UseSSE) {
+ useSSE = UseSSE;
useGOT = 0;
TLSOffset = nullptr;
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 4d279de..564343f 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -19,16 +19,14 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class X86TargetMachine;
class X86Subtarget;
class X86JITInfo : public TargetJITInfo {
- X86TargetMachine &TM;
- const X86Subtarget *Subtarget;
uintptr_t PICBase;
- char* TLSOffset;
+ char *TLSOffset;
+ bool useSSE;
public:
- explicit X86JITInfo(X86TargetMachine &tm);
+ explicit X86JITInfo(bool UseSSE);
/// replaceMachineCodeForFunction - Make it so that calling the function
/// whose machine code is at OLD turns into a call to NEW, perhaps by
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 0190080..2bd70a9 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -883,6 +887,37 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(X86::R10)
.addReg(X86::RAX));
return;
+
+ case X86::SEH_PushReg:
+ OutStreamer.EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
+ return;
+
+ case X86::SEH_SaveReg:
+ OutStreamer.EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_SaveXMM:
+ OutStreamer.EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_StackAlloc:
+ OutStreamer.EmitWinCFIAllocStack(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_SetFrame:
+ OutStreamer.EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_PushFrame:
+ OutStreamer.EmitWinCFIPushFrame(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_EndPrologue:
+ OutStreamer.EmitWinCFIEndProlog();
+ return;
}
MCInst TmpInst;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a83e1e4..e8a7e84 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -53,20 +53,18 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
-X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm)
- : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
- (tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP)),
- TM(tm) {
+X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
+ : X86GenRegisterInfo(
+ (STI.is64Bit() ? X86::RIP : X86::EIP),
+ X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true),
+ (STI.is64Bit() ? X86::RIP : X86::EIP)),
+ Subtarget(STI) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
// Cache some information.
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- Is64Bit = Subtarget->is64Bit();
- IsWin64 = Subtarget->isTargetWin64();
+ Is64Bit = Subtarget.is64Bit();
+ IsWin64 = Subtarget.isTargetWin64();
if (Is64Bit) {
SlotSize = 8;
@@ -83,21 +81,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm)
BasePtr = Is64Bit ? X86::RBX : X86::ESI;
}
-/// getCompactUnwindRegNum - This function maps the register to the number for
-/// compact unwind encoding. Return -1 if the register isn't valid.
-int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
- switch (getLLVMRegNum(RegNum, isEH)) {
- case X86::EBX: case X86::RBX: return 1;
- case X86::ECX: case X86::R12: return 2;
- case X86::EDX: case X86::R13: return 3;
- case X86::EDI: case X86::R14: return 4;
- case X86::ESI: case X86::R15: return 5;
- case X86::EBP: case X86::RBP: return 6;
- }
-
- return -1;
-}
-
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
// ExeDepsFixer and PostRAScheduler require liveness.
@@ -173,9 +156,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
}
const TargetRegisterClass *
-X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
@@ -225,7 +207,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case X86::GR64RegClassID:
return 12 - FPDiff;
case X86::VR128RegClassID:
- return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ return Subtarget.is64Bit() ? 10 : 4;
case X86::VR64RegClassID:
return 4;
}
@@ -233,8 +215,8 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
assert(MF && "MachineFunction required");
switch (MF->getFunction()->getCallingConv()) {
@@ -287,8 +269,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t*
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
switch (CC) {
case CallingConv::GHC:
@@ -406,7 +388,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*AI);
}
}
- if (!Is64Bit || !TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (!Is64Bit || !Subtarget.hasAVX512()) {
for (unsigned n = 16; n != 32; ++n) {
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
Reserved.set(*AI);
@@ -459,7 +441,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 2289d91..74efd1f 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -22,11 +22,11 @@
namespace llvm {
class Type;
class TargetInstrInfo;
- class X86TargetMachine;
+ class X86Subtarget;
class X86RegisterInfo final : public X86GenRegisterInfo {
public:
- X86TargetMachine &TM;
+ const X86Subtarget &Subtarget;
private:
/// Is64Bit - Is the target 64-bits.
@@ -55,15 +55,11 @@ private:
unsigned BasePtr;
public:
- X86RegisterInfo(X86TargetMachine &tm);
+ X86RegisterInfo(const X86Subtarget &STI);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
- /// getCompactUnwindRegNum - This function maps the register to the number for
- /// compact unwind encoding. Return -1 if the register isn't valid.
- int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const override;
-
/// Code Generation virtual methods...
///
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 744890d..a83dd9b 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,21 +11,23 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetMachine.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86SelectionDAGInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Target/TargetLowering.h"
+
using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
-X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
- TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<X86Subtarget>()),
- TLI(*TM.getTargetLowering()) {
-}
+X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
-X86SelectionDAGInfo::~X86SelectionDAGInfo() {
-}
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
@@ -35,6 +37,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
@@ -43,16 +46,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- !ConstantSize ||
- ConstantSize->getZExtValue() >
- Subtarget->getMaxInlineSizeThreshold()) {
+ if ((Align & 3) != 0 || !ConstantSize ||
+ ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) {
- EVT IntPtr = TLI.getPointerTy();
+ V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
+ EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -65,10 +66,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args),
+ 0)
.setDiscardResult();
- std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+ std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI);
return CallResult.second;
}
@@ -99,7 +101,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
@@ -128,13 +130,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
InFlag = Chain.getValue(1);
}
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -182,10 +182,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
// This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
/// If not DWORD aligned, it is more efficient to call the library. However
@@ -218,7 +219,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
AVT = MVT::i32;
else
// QWORD aligned
- AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
@@ -226,15 +227,15 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned BytesLeft = SizeVal % UBytes;
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX :
X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI :
X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI :
X86::ESI,
Src, InFlag);
InFlag = Chain.getValue(1);
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 0d5dc38..c12555a 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -23,14 +23,8 @@ class X86TargetMachine;
class X86Subtarget;
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const X86Subtarget *Subtarget;
-
- const X86TargetLowering &TLI;
-
public:
- explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+ explicit X86SelectionDAGInfo(const DataLayout &DL);
~X86SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 989e0d6..79b7e68 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -291,13 +291,60 @@ void X86Subtarget::initializeEnvironment() {
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
+ SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
}
+static std::string computeDataLayout(const X86Subtarget &ST) {
+ // X86 is little endian
+ std::string Ret = "e";
+
+ Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+ // X86 and x32 have 32 bit pointers.
+ if (ST.isTarget64BitILP32() || !ST.is64Bit())
+ Ret += "-p:32:32";
+
+ // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
+ if (ST.is64Bit() || ST.isOSWindows() || ST.isTargetNaCl())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // Some ABIs align long double to 128 bits, others to 32.
+ if (ST.isTargetNaCl())
+ ; // No f80
+ else if (ST.is64Bit() || ST.isTargetDarwin())
+ Ret += "-f80:128";
+ else
+ Ret += "-f80:32";
+
+ // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
+ if (ST.is64Bit())
+ Ret += "-n8:16:32:64";
+ else
+ Ret += "-n8:16:32";
+
+ // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
+ if (!ST.is64Bit() && ST.isOSWindows())
+ Ret += "-S32";
+ else
+ Ret += "-S128";
+
+ return Ret;
+}
+
+X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, unsigned StackAlignOverride)
+ const std::string &FS, X86TargetMachine &TM,
+ unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
@@ -305,10 +352,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() == Triple::CODE16) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
-}
+ TargetTriple.getEnvironment() == Triple::CODE16),
+ DL(computeDataLayout(*this)), TSInfo(DL),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
+ is64Bit() ? -8 : -4),
+ JITInfo(hasSSE1()) {}
bool
X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 703559a..09db0eb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,6 +14,11 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -40,6 +45,7 @@ enum Style {
}
class X86Subtarget final : public X86GenSubtargetInfo {
+
protected:
enum X86SSEEnum {
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
@@ -181,6 +187,9 @@ protected:
/// SlowLEA - True if the LEA instruction with certain arguments is slow
bool SlowLEA;
+ /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
+ bool SlowIncDec;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -217,14 +226,31 @@ private:
/// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
+ // Calculates type size & alignment
+ const DataLayout DL;
+ X86SelectionDAGInfo TSInfo;
+ // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
+ // X86TargetLowering needs.
+ X86InstrInfo InstrInfo;
+ X86TargetLowering TLInfo;
+ X86FrameLowering FrameLowering;
+ X86JITInfo JITInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
+ const std::string &FS, X86TargetMachine &TM,
unsigned StackAlignOverride);
+ const X86TargetLowering *getTargetLowering() const { return &TLInfo; }
+ const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const X86FrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ X86JITInfo *getJITInfo() { return &JITInfo; }
+
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -241,6 +267,9 @@ public:
/// \brief Reset the features for the X86 target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
+ /// \brief Initialize the full set of dependencies so we can use an initializer
+ /// list for X86Subtarget.
+ X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
public:
@@ -319,6 +348,7 @@ public:
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
+ bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 93760ef..f12140f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -29,61 +29,14 @@ extern "C" void LLVMInitializeX86Target() {
void X86TargetMachine::anchor() { }
-static std::string computeDataLayout(const X86Subtarget &ST) {
- // X86 is little endian
- std::string Ret = "e";
-
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
- // X86 and x32 have 32 bit pointers.
- if (ST.isTarget64BitILP32() || !ST.is64Bit())
- Ret += "-p:32:32";
-
- // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
- if (ST.is64Bit() || ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC() ||
- ST.isTargetNaCl())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // Some ABIs align long double to 128 bits, others to 32.
- if (ST.isTargetNaCl())
- ; // No f80
- else if (ST.is64Bit() || ST.isTargetDarwin())
- Ret += "-f80:128";
- else
- Ret += "-f80:32";
-
- // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
- if (ST.is64Bit())
- Ret += "-n8:16:32:64";
- else
- Ret += "-n8:16:32";
-
- // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
- if (!ST.is64Bit() && (ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC()))
- Ret += "-S32";
- else
- Ret += "-S128";
-
- return Ret;
-}
-
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, Options.StackAlignmentOverride),
- FrameLowering(*this, Subtarget),
- InstrItins(Subtarget.getInstrItineraryData()),
- DL(computeDataLayout(*getSubtargetImpl())),
- InstrInfo(*this),
- TLInfo(*this),
- TSInfo(*this),
- JITInfo(*this) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -158,6 +111,7 @@ public:
return *getX86TargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
bool addPreRegAlloc() override;
@@ -170,6 +124,12 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
}
+void X86PassConfig::addIRPasses() {
+ addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
+
+ TargetPassConfig::addIRPasses();
+}
+
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 57e6eda..41d5157 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -13,12 +13,7 @@
#ifndef X86TARGETMACHINE_H
#define X86TARGETMACHINE_H
-
-#include "X86FrameLowering.h"
-#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,13 +25,6 @@ class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
virtual void anchor();
X86Subtarget Subtarget;
- X86FrameLowering FrameLowering;
- InstrItineraryData InstrItins;
- const DataLayout DL; // Calculates type size & alignment
- X86InstrInfo InstrInfo;
- X86TargetLowering TLInfo;
- X86SelectionDAGInfo TSInfo;
- X86JITInfo JITInfo;
public:
X86TargetMachine(const Target &T, StringRef TT,
@@ -44,28 +32,28 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- const DataLayout *getDataLayout() const override { return &DL; }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
const X86InstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- X86JITInfo *getJITInfo() override {
- return &JITInfo;
+ return getSubtargetImpl()->getFrameLowering();
}
+ X86JITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
const X86TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
/// \brief Register X86 analysis passes with a pass manager.
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 91b9d40..c961e2f 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -102,6 +102,8 @@ public:
unsigned getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const override;
+ unsigned getIntImmCost(int64_t) const;
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
@@ -142,13 +144,17 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasSSE1())
return 0;
- if (ST->is64Bit())
+ if (ST->is64Bit()) {
+ if (Vector && ST->hasAVX512())
+ return 32;
return 16;
+ }
return 8;
}
unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
if (Vector) {
+ if (ST->hasAVX512()) return 512;
if (ST->hasAVX()) return 256;
if (ST->hasSSE1()) return 128;
return 0;
@@ -400,17 +406,117 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only estimate the cost of reverse shuffles.
- if (Kind != SK_Reverse)
+ // We only estimate the cost of reverse and alternate shuffles.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- unsigned Cost = 1;
- if (LT.second.getSizeInBits() > 128)
- Cost = 3; // Extract + insert + copy.
+ if (Kind == SK_Reverse) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
+
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+ }
+
+ if (Kind == SK_Alternate) {
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ // The backend knows how to generate a single VEX.256 version of
+ // instruction VPBLENDW if the target supports AVX2.
+ if (ST->hasAVX2() && LT.second == MVT::v16i16)
+ return LT.first;
+
+ static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
+ {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
+ {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
+
+ // This shuffle is custom lowered into a sequence of:
+ // 2x vextractf128 , 2x vpblendw , 1x vinsertf128
+ {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
+
+ // This shuffle is custom lowered into a long sequence of:
+ // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
+ {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
+ };
+
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * AVXAltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
+ // These are lowered into movsd.
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+ // packed float vectors with four elements are lowered into BLENDI dag
+ // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+
+ // This shuffle generates a single pshufw.
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+
+ // There is no instruction that matches a v16i8 alternate shuffle.
+ // The backend will expand it into the sequence 'pshufb + pshufb + or'.
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
+ };
+
+ if (ST->hasSSE41()) {
+ int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE41AltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
+
+ // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
+ // the sequence 'shufps + pshufd'
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
- // Multiple by the number of parts.
- return Cost * LT.first;
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
+ };
+
+ if (ST->hasSSSE3()) {
+ int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
+
+ // This is expanded into a long sequence of four extract + four insert.
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
+
+ // 8 x (pinsrw + pextrw + and + movb + movzb + or)
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
+ };
+
+ // Fall-back (SSE3 and SSE2).
+ int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSEAltShuffleTbl[Idx].Cost;
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
+
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
@@ -808,6 +914,19 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
}
+/// \brief Calculate the cost of materializing a 64-bit value. This helper
+/// method might only calculate a fraction of a larger immediate. Therefore it
+/// is valid to return a cost of ZERO.
+unsigned X86TTI::getIntImmCost(int64_t Val) const {
+ if (Val == 0)
+ return TCC_Free;
+
+ if (isInt<32>(Val))
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+}
+
unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
assert(Ty->isIntegerTy());
@@ -825,11 +944,21 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
if (Imm == 0)
return TCC_Free;
- if (Imm.getBitWidth() <= 64 &&
- (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue())))
- return TCC_Basic;
- else
- return 2 * TCC_Basic;
+ // Sign-extend all constants to a multiple of 64-bit.
+ APInt ImmVal = Imm;
+ if (BitSize & 0x3f)
+ ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+
+ // Split the constant into 64-bit chunks and calculate the cost for each
+ // chunk.
+ unsigned Cost = 0;
+ for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
+ APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
+ int64_t Val = Tmp.getSExtValue();
+ Cost += getIntImmCost(Val);
+ }
+ // We need at least one instruction to materialze the constant.
+ return std::max(1U, Cost);
}
unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
@@ -889,9 +1018,13 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
break;
}
- if ((Idx == ImmIdx) &&
- Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
- return TCC_Free;
+ if (Idx == ImmIdx) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = X86TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic)
+ ? static_cast<unsigned>(TCC_Free)
+ : Cost;
+ }
return X86TTI::getIntImmCost(Imm, Ty);
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 5499aba..e694736 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -228,7 +228,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
if (MFI->getMaxAlignment() > getStackAlignment())
report_fatal_error("emitPrologue unsupported alignment: "
@@ -416,7 +418,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
DebugLoc DL;
- if (MI != MBB.end())
+ if (MI != MBB.end() && !MI->isDebugValue())
DL = MI->getDebugLoc();
for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9d78586..be7ef64 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -68,10 +68,9 @@ getTargetNodeName(unsigned Opcode) const
}
}
-XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
- : TargetLowering(XTM, new XCoreTargetObjectFile()),
- TM(XTM),
- Subtarget(*XTM.getSubtargetImpl()) {
+XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM, new XCoreTargetObjectFile()), TM(TM),
+ Subtarget(TM.getSubtarget<XCoreSubtarget>()) {
// Set up the register classes.
addRegisterClass(MVT::i32, &XCore::GRRegsRegClass);
@@ -92,15 +91,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// XCore does not have the NodeTypes below.
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::ADDC, MVT::i32, Expand);
setOperationAction(ISD::ADDE, MVT::i32, Expand);
setOperationAction(ISD::SUBC, MVT::i32, Expand);
setOperationAction(ISD::SUBE, MVT::i32, Expand);
- // Stop the combiner recombining select and set_cc
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-
// 64bit
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
@@ -217,7 +213,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
@@ -258,33 +253,21 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
-SDValue XCoreTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDLoc dl(Op);
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
- Op.getOperand(3), Op.getOperand(4));
- return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
- Op.getOperand(1));
-}
-
SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA,
const GlobalValue *GV,
SelectionDAG &DAG) const {
// FIXME there is no actual debug info here
SDLoc dl(GA);
- const GlobalValue *UnderlyingGV = GV;
- // If GV is an alias then use the aliasee to determine the wrapper type
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- UnderlyingGV = GA->getAliasee();
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
- if ((GVar->isConstant() && GV->hasLocalLinkage()) ||
- (GVar->hasSection() &&
- StringRef(GVar->getSection()).startswith(".cp.")))
- return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
- }
- return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+
+ if (GV->getType()->getElementType()->isFunctionTy())
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+
+ const auto *GVar = dyn_cast<GlobalVariable>(GV);
+ if ((GV->hasSection() && StringRef(GV->getSection()).startswith(".cp.")) ||
+ (GVar && GVar->isConstant() && GV->hasLocalLinkage()))
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
}
static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) {
@@ -508,7 +491,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
CLI.setDebugLoc(DL).setChain(Chain)
.setCallee(CallingConv::C, IntPtrTy,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ops[] = { CallResult.first, CallResult.second };
@@ -568,7 +551,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d28715b..62b89c3 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -94,7 +94,7 @@ namespace llvm {
{
public:
- explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+ explicit XCoreTargetLowering(const TargetMachine &TM);
using TargetLowering::isZExtFree;
bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -123,7 +123,7 @@ namespace llvm {
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
private:
- const XCoreTargetMachine &TM;
+ const TargetMachine &TM;
const XCoreSubtarget &Subtarget;
// Lower Operand helpers
@@ -157,7 +157,6 @@ namespace llvm {
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 984f0cd..36ea9a0 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -373,7 +373,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ if (I != MBB.end() && !I->isDebugValue())
+ DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO =
@@ -395,7 +396,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ if (I != MBB.end() && !I->isDebugValue())
+ DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO =
@@ -440,7 +442,8 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
MachineBasicBlock::iterator MI,
unsigned Reg, uint64_t Value) const {
DebugLoc dl;
- if (MI != MBB.end()) dl = MI->getDebugLoc();
+ if (MI != MBB.end() && !MI->isDebugValue())
+ dl = MI->getDebugLoc();
if (isImmMskBitp(Value)) {
int N = Log2_32(Value) + 1;
return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N);
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 5a6bbe7..91b33fd 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "xcore-selectiondag-info"
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
}
@@ -47,7 +46,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
.setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()),
- &Args, 0)
+ std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index ea6af98..0079de1 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class XCoreTargetMachine;
class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+ explicit XCoreSelectionDAGInfo(const DataLayout &DL);
~XCoreSelectionDAGInfo();
SDValue
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 89ea03a..7227411 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -25,8 +25,8 @@ using namespace llvm;
void XCoreSubtarget::anchor() { }
-XCoreSubtarget::XCoreSubtarget(const std::string &TT,
- const std::string &CPU, const std::string &FS)
- : XCoreGenSubtargetInfo(TT, CPU, FS)
-{
-}
+XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM)
+ : XCoreGenSubtargetInfo(TT, CPU, FS),
+ DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
+ InstrInfo(), FrameLowering(*this), TLInfo(TM), TSInfo(DL) {}
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 5ac4dbc..1e9810b 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -14,6 +14,11 @@
#ifndef XCORESUBTARGET_H
#define XCORESUBTARGET_H
+#include "XCoreFrameLowering.h"
+#include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -26,17 +31,31 @@ class StringRef;
class XCoreSubtarget : public XCoreGenSubtargetInfo {
virtual void anchor();
+ const DataLayout DL; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameLowering FrameLowering;
+ XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
XCoreSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const XCoreFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const XCoreSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const DataLayout *getDataLayout() const { return &DL; }
};
} // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 0fb21c5..8d8bb38 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -25,13 +25,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
- InstrInfo(),
- FrameLowering(Subtarget),
- TLInfo(*this),
- TSInfo(*this) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index a57ca55..14c43bf 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,46 +14,38 @@
#ifndef XCORETARGETMACHINE_H
#define XCORETARGETMACHINE_H
-#include "XCoreFrameLowering.h"
-#include "XCoreISelLowering.h"
-#include "XCoreInstrInfo.h"
-#include "XCoreSelectionDAGInfo.h"
#include "XCoreSubtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- XCoreInstrInfo InstrInfo;
- XCoreFrameLowering FrameLowering;
- XCoreTargetLowering TLInfo;
- XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const XCoreInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
const XCoreFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const XCoreTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
-
const TargetRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;